1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22 
23 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24 
25 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		50000		/* 50ms */
27 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
28 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
29 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
30 #define GAUDI2_RESET_POLL_CNT			3
31 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
32 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
33 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
34 #define GAUDI2_CB_POOL_CB_CNT			512
35 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
36 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
37 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
38 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
39 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
40 
41 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
42 
43 /*
44  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
45  * and the code relies on that value (for array size etc..) we define another value
46  * for MAX faulty TPCs which reflects the cluster binning requirements
47  */
48 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
49 #define MAX_FAULTY_XBARS			1
50 #define MAX_FAULTY_EDMAS			1
51 #define MAX_FAULTY_DECODERS			1
52 
53 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
54 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
55 #define GAUDI2_DECODER_FULL_MASK		0x3FF
56 
57 #define GAUDI2_NA_EVENT_CAUSE			0xFF
58 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
59 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
60 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
61 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
62 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
63 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
64 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
65 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
66 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
67 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
68 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
79 
80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
82 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
83 
84 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
86 
87 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
88 
89 #define IS_DMA_IDLE(dma_core_idle_ind_mask)	\
90 	(!((dma_core_idle_ind_mask) &		\
91 	((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
92 	(DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
93 
94 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
95 
96 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
97 
98 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
99 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
100 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
101 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
102 
103 #define PCIE_DEC_EN_MASK			0x300
104 #define DEC_WORK_STATE_IDLE			0
105 #define DEC_WORK_STATE_PEND			3
106 #define IS_DEC_IDLE(dec_swreg15) \
107 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
108 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
109 
110 /* HBM MMU address scrambling parameters */
111 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
112 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
113 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
114 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
115 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
116 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
117 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
118 #define MMU_RANGE_INV_EN_SHIFT			0
119 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
120 #define MMU_RANGE_INV_ASID_SHIFT		2
121 
122 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
123  * a 2 entries FIFO, and hence it is not enabled for it.
124  */
125 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
126 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
127 
128 #define GAUDI2_MAX_STRING_LEN			64
129 
130 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
131 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
132 
133 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
134 
135 enum hl_pmmu_fatal_cause {
136 	LATENCY_RD_OUT_FIFO_OVERRUN,
137 	LATENCY_WR_OUT_FIFO_OVERRUN,
138 };
139 
140 enum hl_pcie_drain_ind_cause {
141 	LBW_AXI_DRAIN_IND,
142 	HBW_AXI_DRAIN_IND
143 };
144 
145 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
146 	[HBM_ID0] = 0xFFFC,
147 	[HBM_ID1] = 0xFFCF,
148 	[HBM_ID2] = 0xF7F7,
149 	[HBM_ID3] = 0x7F7F,
150 	[HBM_ID4] = 0xFCFF,
151 	[HBM_ID5] = 0xCFFF,
152 };
153 
154 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
155 	[0] = HBM_ID0,
156 	[1] = HBM_ID1,
157 	[2] = HBM_ID4,
158 	[3] = HBM_ID5,
159 };
160 
161 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
162 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
163 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
164 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
165 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
166 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
167 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
168 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
169 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
170 };
171 
172 static const int gaudi2_qman_async_event_id[] = {
173 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
174 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
175 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
176 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
177 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
178 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
179 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
180 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
181 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
182 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
183 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
184 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
185 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
186 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
187 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
188 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
189 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
190 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
191 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
192 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
193 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
194 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
195 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
196 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
197 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
198 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
199 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
200 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
201 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
202 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
203 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
204 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
205 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
206 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
207 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
208 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
209 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
210 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
211 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
212 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
213 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
214 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
215 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
216 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
217 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
218 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
219 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
220 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
221 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
222 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
223 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
224 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
225 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
226 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
227 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
228 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
229 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
230 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
231 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
232 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
233 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
234 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
235 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
236 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
237 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
238 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
239 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
240 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
241 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
242 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
243 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
244 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
245 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
246 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
247 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
248 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
249 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
250 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
251 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
252 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
253 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
254 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
255 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
256 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
257 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
258 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
259 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
260 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
261 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
262 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
263 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
264 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
265 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
266 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
267 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
268 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
269 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
270 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
271 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
272 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
273 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
274 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
275 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
276 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
277 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
278 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
279 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
280 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
281 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
282 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
283 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
284 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
285 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
286 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
287 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
288 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
289 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
290 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
291 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
292 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
293 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
294 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
295 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
296 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
297 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
298 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
299 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
300 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
301 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
302 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
303 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
304 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
305 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
306 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
307 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
308 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
309 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
310 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
311 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
312 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
313 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
314 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
315 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
316 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
317 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
318 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
319 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
320 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
321 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
322 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
323 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
324 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
325 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
326 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
327 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
328 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
329 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
330 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
331 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
332 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
333 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
334 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
335 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
336 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
337 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
338 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
339 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
340 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
341 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
342 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
343 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
344 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
345 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
346 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
347 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
348 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
349 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
350 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
351 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
352 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
353 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
354 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
355 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
356 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
357 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
358 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
359 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
360 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
361 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
362 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
363 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
364 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
365 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
366 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
367 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
368 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
369 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
370 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
371 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
372 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
373 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
374 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
375 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
376 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
377 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
378 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
379 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
380 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
381 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
382 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
383 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
384 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
385 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
386 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
387 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
388 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
389 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
390 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
391 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
392 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
393 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
394 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
395 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
396 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
397 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
398 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
399 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
400 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
401 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
402 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
403 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
404 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
405 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
406 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
407 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
408 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
409 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
410 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
411 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
412 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
413 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
414 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
415 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
416 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
417 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
418 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
419 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
420 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
421 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
422 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
423 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
424 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
425 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
426 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
427 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
428 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
429 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
430 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
431 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
432 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
433 };
434 
435 static const int gaudi2_dma_core_async_event_id[] = {
436 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
437 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
438 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
439 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
440 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
441 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
442 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
443 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
444 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
445 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
446 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
447 };
448 
449 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
450 	"qman sei intr",
451 	"arc sei intr"
452 };
453 
454 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
455 	"AXI_TERMINATOR WR",
456 	"AXI_TERMINATOR RD",
457 	"AXI SPLIT SEI Status"
458 };
459 
460 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
461 	"cbu_bresp_sei_intr_cause",
462 	"cbu_rresp_sei_intr_cause",
463 	"lbu_bresp_sei_intr_cause",
464 	"lbu_rresp_sei_intr_cause",
465 	"cbu_axi_split_intr_cause",
466 	"lbu_axi_split_intr_cause",
467 	"arc_ip_excptn_sei_intr_cause",
468 	"dmi_bresp_sei_intr_cause",
469 	"aux2apb_err_sei_intr_cause",
470 	"cfg_lbw_wr_terminated_intr_cause",
471 	"cfg_lbw_rd_terminated_intr_cause",
472 	"cfg_dccm_wr_terminated_intr_cause",
473 	"cfg_dccm_rd_terminated_intr_cause",
474 	"cfg_hbw_rd_terminated_intr_cause"
475 };
476 
477 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
478 	"msix_vcd_hbw_sei",
479 	"msix_l2c_hbw_sei",
480 	"msix_nrm_hbw_sei",
481 	"msix_abnrm_hbw_sei",
482 	"msix_vcd_lbw_sei",
483 	"msix_l2c_lbw_sei",
484 	"msix_nrm_lbw_sei",
485 	"msix_abnrm_lbw_sei",
486 	"apb_vcd_lbw_sei",
487 	"apb_l2c_lbw_sei",
488 	"apb_nrm_lbw_sei",
489 	"apb_abnrm_lbw_sei",
490 	"dec_sei",
491 	"dec_apb_sei",
492 	"trc_apb_sei",
493 	"lbw_mstr_if_sei",
494 	"axi_split_bresp_err_sei",
495 	"hbw_axi_wr_viol_sei",
496 	"hbw_axi_rd_viol_sei",
497 	"lbw_axi_wr_viol_sei",
498 	"lbw_axi_rd_viol_sei",
499 	"vcd_spi",
500 	"l2c_spi",
501 	"nrm_spi",
502 	"abnrm_spi",
503 };
504 
505 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
506 	"PQ AXI HBW error",
507 	"CQ AXI HBW error",
508 	"CP AXI HBW error",
509 	"CP error due to undefined OPCODE",
510 	"CP encountered STOP OPCODE",
511 	"CP AXI LBW error",
512 	"CP WRREG32 or WRBULK returned error",
513 	"N/A",
514 	"FENCE 0 inc over max value and clipped",
515 	"FENCE 1 inc over max value and clipped",
516 	"FENCE 2 inc over max value and clipped",
517 	"FENCE 3 inc over max value and clipped",
518 	"FENCE 0 dec under min value and clipped",
519 	"FENCE 1 dec under min value and clipped",
520 	"FENCE 2 dec under min value and clipped",
521 	"FENCE 3 dec under min value and clipped",
522 	"CPDMA Up overflow",
523 	"PQC L2H error"
524 };
525 
526 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
527 	"RSVD0",
528 	"CQ AXI HBW error",
529 	"CP AXI HBW error",
530 	"CP error due to undefined OPCODE",
531 	"CP encountered STOP OPCODE",
532 	"CP AXI LBW error",
533 	"CP WRREG32 or WRBULK returned error",
534 	"N/A",
535 	"FENCE 0 inc over max value and clipped",
536 	"FENCE 1 inc over max value and clipped",
537 	"FENCE 2 inc over max value and clipped",
538 	"FENCE 3 inc over max value and clipped",
539 	"FENCE 0 dec under min value and clipped",
540 	"FENCE 1 dec under min value and clipped",
541 	"FENCE 2 dec under min value and clipped",
542 	"FENCE 3 dec under min value and clipped",
543 	"CPDMA Up overflow",
544 	"RSVD17",
545 	"CQ_WR_IFIFO_CI_ERR",
546 	"CQ_WR_CTL_CI_ERR",
547 	"ARC_CQF_RD_ERR",
548 	"ARC_CQ_WR_IFIFO_CI_ERR",
549 	"ARC_CQ_WR_CTL_CI_ERR",
550 	"ARC_AXI_ERR",
551 	"CP_SWITCH_WDT_ERR"
552 };
553 
554 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
555 	"Choice push while full error",
556 	"Choice Q watchdog error",
557 	"MSG AXI LBW returned with error"
558 };
559 
560 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
561 	"qm_axi_err",
562 	"qm_trace_fence_events",
563 	"qm_sw_err",
564 	"qm_cp_sw_stop",
565 	"lbw_mstr_rresp_err",
566 	"lbw_mstr_bresp_err",
567 	"lbw_msg_slverr",
568 	"hbw_msg_slverr",
569 	"wbc_slverr",
570 	"hbw_mstr_rresp_err",
571 	"hbw_mstr_bresp_err",
572 	"sb_resp_intr",
573 	"mrsb_resp_intr",
574 	"core_dw_status_0",
575 	"core_dw_status_1",
576 	"core_dw_status_2",
577 	"core_dw_status_3",
578 	"core_dw_status_4",
579 	"core_dw_status_5",
580 	"core_dw_status_6",
581 	"core_dw_status_7",
582 	"async_arc2cpu_sei_intr",
583 };
584 
585 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
586 	"tpc_address_exceed_slm",
587 	"tpc_div_by_0",
588 	"tpc_spu_mac_overflow",
589 	"tpc_spu_addsub_overflow",
590 	"tpc_spu_abs_overflow",
591 	"tpc_spu_fma_fp_dst_nan",
592 	"tpc_spu_fma_fp_dst_inf",
593 	"tpc_spu_convert_fp_dst_nan",
594 	"tpc_spu_convert_fp_dst_inf",
595 	"tpc_spu_fp_dst_denorm",
596 	"tpc_vpu_mac_overflow",
597 	"tpc_vpu_addsub_overflow",
598 	"tpc_vpu_abs_overflow",
599 	"tpc_vpu_convert_fp_dst_nan",
600 	"tpc_vpu_convert_fp_dst_inf",
601 	"tpc_vpu_fma_fp_dst_nan",
602 	"tpc_vpu_fma_fp_dst_inf",
603 	"tpc_vpu_fp_dst_denorm",
604 	"tpc_assertions",
605 	"tpc_illegal_instruction",
606 	"tpc_pc_wrap_around",
607 	"tpc_qm_sw_err",
608 	"tpc_hbw_rresp_err",
609 	"tpc_hbw_bresp_err",
610 	"tpc_lbw_rresp_err",
611 	"tpc_lbw_bresp_err",
612 	"st_unlock_already_locked",
613 	"invalid_lock_access",
614 	"LD_L protection violation",
615 	"ST_L protection violation",
616 };
617 
618 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
619 	"agu_resp_intr",
620 	"qman_axi_err",
621 	"wap sei (wbc axi err)",
622 	"arc sei",
623 	"cfg access error",
624 	"qm_sw_err",
625 	"sbte_dbg_intr_0",
626 	"sbte_dbg_intr_1",
627 	"sbte_dbg_intr_2",
628 	"sbte_dbg_intr_3",
629 	"sbte_dbg_intr_4",
630 	"sbte_prtn_intr_0",
631 	"sbte_prtn_intr_1",
632 	"sbte_prtn_intr_2",
633 	"sbte_prtn_intr_3",
634 	"sbte_prtn_intr_4",
635 };
636 
637 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
638 	"i0",
639 	"i1",
640 	"i2",
641 	"i3",
642 	"i4",
643 };
644 
645 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
646 	"WBC ERR RESP_0",
647 	"WBC ERR RESP_1",
648 	"AP SOURCE POS INF",
649 	"AP SOURCE NEG INF",
650 	"AP SOURCE NAN",
651 	"AP RESULT POS INF",
652 	"AP RESULT NEG INF",
653 };
654 
655 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
656 	"HBW Read returned with error RRESP",
657 	"HBW write returned with error BRESP",
658 	"LBW write returned with error BRESP",
659 	"descriptor_fifo_overflow",
660 	"KDMA SB LBW Read returned with error",
661 	"KDMA WBC LBW Write returned with error",
662 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
663 	"WRONG CFG FOR COMMIT IN LIN DMA"
664 };
665 
666 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
667 	"HBW/LBW Read returned with error RRESP",
668 	"HBW/LBW write returned with error BRESP",
669 	"LBW write returned with error BRESP",
670 	"descriptor_fifo_overflow",
671 	"KDMA SB LBW Read returned with error",
672 	"KDMA WBC LBW Write returned with error",
673 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
674 	"WRONG CFG FOR COMMIT IN LIN DMA"
675 };
676 
677 struct gaudi2_sm_sei_cause_data {
678 	const char *cause_name;
679 	const char *log_name;
680 };
681 
682 static const struct gaudi2_sm_sei_cause_data
683 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
684 	{"calculated SO value overflow/underflow", "SOB ID"},
685 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
686 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
687 };
688 
689 static const char * const
690 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
691 	"LATENCY_RD_OUT_FIFO_OVERRUN",
692 	"LATENCY_WR_OUT_FIFO_OVERRUN",
693 };
694 
695 static const char * const
696 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
697 	"LATENCY_RD_OUT_FIFO_OVERRUN",
698 	"LATENCY_WR_OUT_FIFO_OVERRUN",
699 };
700 
701 static const char * const
702 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
703 	"AXI drain HBW",
704 	"AXI drain LBW",
705 };
706 
707 static const char * const
708 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
709 	"HBW error response",
710 	"LBW error response",
711 	"TLP is blocked by RR"
712 };
713 
714 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
715 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
716 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
717 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
718 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
719 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
720 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
721 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
722 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
723 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
724 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
725 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
726 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
727 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
728 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
729 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
730 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
731 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
732 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
733 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
734 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
735 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
736 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
737 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
738 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
739 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
740 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
741 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
742 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
743 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
744 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
745 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
746 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
747 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
748 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
749 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
750 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
751 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
752 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
753 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
754 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
755 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
756 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
757 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
758 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
759 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
760 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
761 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
762 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
763 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
764 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
765 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
766 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
767 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
768 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
769 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
770 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
771 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
772 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
773 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
774 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
775 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
776 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
777 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
778 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
779 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
780 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
781 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
782 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
783 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
784 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
785 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
786 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
787 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
788 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
789 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
790 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
791 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
792 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
793 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
794 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
795 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
796 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
797 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
798 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
799 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
800 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
801 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
802 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
803 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
804 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
805 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
806 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
807 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
808 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
809 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
810 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
811 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
812 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
813 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
814 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
815 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
816 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
817 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
818 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
819 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
820 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
821 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
822 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
823 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
824 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
825 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
826 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
827 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
828 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
829 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
830 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
831 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
832 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
833 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
834 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
835 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
836 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
837 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
838 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
839 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
840 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
841 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
842 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
843 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
844 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
845 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
846 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
847 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
848 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
849 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
850 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
851 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
852 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
853 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
854 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
855 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
856 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
857 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
858 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
859 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
860 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
861 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
862 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
863 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
864 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
865 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
866 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
867 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
868 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
869 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
870 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
871 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
872 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
873 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
874 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
875 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
876 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
877 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
878 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
879 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
880 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
881 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
882 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
883 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
884 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
885 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
886 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
887 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
888 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
889 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
890 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
891 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
892 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
893 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
894 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
895 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
896 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
897 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
898 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
899 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
900 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
901 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
902 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
903 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
904 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
905 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
906 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
907 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
908 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
909 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
910 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
911 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
912 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
913 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
914 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
915 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
916 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
917 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
918 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
919 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
920 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
921 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
922 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
923 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
924 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
925 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
926 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
927 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
928 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
929 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
930 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
931 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
932 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
933 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
934 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
935 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
936 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
937 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
938 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
939 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
940 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
941 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
942 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
943 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
944 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
945 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
946 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
947 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
948 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
949 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
950 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
951 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
952 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
953 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
954 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
955 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
956 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
957 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
958 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
959 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
960 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
961 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
962 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
963 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
964 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
965 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
966 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
967 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
968 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
969 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
970 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
971 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
972 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
973 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
974 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
975 };
976 
977 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
978 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
979 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
980 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
981 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
982 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
983 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
984 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
985 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
986 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
987 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
988 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
989 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
990 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
991 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
992 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
993 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
994 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
995 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
996 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
997 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
998 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
999 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1000 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1001 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1002 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1003 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1004 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1005 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1006 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1007 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1008 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1009 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1010 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1011 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1012 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1013 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1014 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1015 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1016 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1017 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1018 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1019 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1020 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1021 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1022 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1023 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1024 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1025 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1026 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1027 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1028 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1029 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1030 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1031 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1032 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1033 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1034 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1035 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1036 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1037 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1038 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1039 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1040 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1041 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1042 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1043 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1044 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1045 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1046 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1047 };
1048 
1049 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1050 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1051 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1052 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1053 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1054 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1055 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1056 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1057 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1058 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1059 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1060 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1061 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1062 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1063 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1064 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1065 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1066 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1067 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1068 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1069 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1070 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1071 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1072 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1073 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1074 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1075 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1076 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1077 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1078 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1079 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1080 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1081 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1082 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1083 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1084 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1085 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1086 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1087 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1088 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1089 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1090 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1091 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1092 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1093 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1094 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1095 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1096 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1097 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1098 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1099 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1100 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1101 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1102 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1103 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1104 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1105 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1106 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1107 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1108 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1109 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1110 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1111 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1112 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1113 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1114 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1115 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1116 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1117 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1118 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1119 };
1120 
1121 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1122 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1123 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1124 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1125 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1126 };
1127 
1128 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1129 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1130 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1131 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1132 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1133 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1134 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1135 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1136 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1137 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1138 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1139 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1140 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1141 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1142 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1143 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1144 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1145 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1146 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1147 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1148 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1149 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1150 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1151 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1152 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1153 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1154 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1155 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1156 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1157 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1158 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1159 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1160 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1161 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1162 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1163 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1164 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1165 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1166 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1167 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1168 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1169 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1170 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1171 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1172 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1173 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1174 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1175 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1176 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1177 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1178 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1179 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1180 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1181 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1182 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1183 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1184 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1185 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1186 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1187 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1188 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1189 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1190 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1191 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1192 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1193 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1194 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1195 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1196 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1197 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1198 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1199 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1200 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1201 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1202 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1203 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1204 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1205 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1206 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1207 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1208 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1209 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1210 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1211 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1212 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1213 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1214 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1215 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1216 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1217 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1218 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1219 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1220 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1221 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1222 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1223 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1224 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1225 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1226 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1227 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1228 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1229 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1230 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1231 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1232 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1233 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1234 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1235 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1236 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1237 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1238 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1239 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1240 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1241 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1242 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1243 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1244 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1245 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1246 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1247 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1248 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1249 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1250 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1251 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1252 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1253 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1254 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1255 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1256 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1257 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1258 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1259 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1260 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1261 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1262 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1263 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1264 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1265 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1266 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1267 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1268 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1269 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1270 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1271 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1272 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1273 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1274 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1275 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1276 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1277 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1278 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1279 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1280 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1281 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1282 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1283 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1284 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1285 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1286 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1287 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1288 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1289 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1290 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1291 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1292 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1293 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1294 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1295 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1296 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1297 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1298 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1299 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1300 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1301 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1302 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1303 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1304 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1305 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1306 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1307 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1308 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1309 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1310 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1311 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1312 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1313 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1314 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1315 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1316 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1317 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1318 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1319 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1320 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1321 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1322 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1323 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1324 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1325 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1326 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1327 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1328 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1329 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1330 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1331 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1332 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1333 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1334 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1335 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1336 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1337 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1338 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1339 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1340 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1341 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1342 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1343 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1344 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1345 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1346 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1347 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1348 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1349 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1350 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1351 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1352 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1353 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1354 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1355 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1356 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1357 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1358 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1359 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1360 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1361 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1362 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1363 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1364 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1365 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1366 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1367 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1368 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1369 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1370 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1371 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1372 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1373 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1374 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1375 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1376 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1377 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1378 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1379 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1380 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1381 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1382 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1383 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1384 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1385 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1386 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1387 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1388 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1389 };
1390 
1391 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1392 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1393 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1394 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1395 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1396 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1397 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1398 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1399 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1400 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1401 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1402 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1403 };
1404 
1405 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1406 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1407 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1408 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1409 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1410 };
1411 
1412 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1413 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1414 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1415 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1416 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1417 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1418 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1419 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1420 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1421 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1422 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1423 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1424 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1425 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1426 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1427 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1428 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1429 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1430 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1431 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1432 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1433 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1434 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1435 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1436 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1437 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1438 };
1439 
1440 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1441 	[ROTATOR_ID_0] = mmROT0_BASE,
1442 	[ROTATOR_ID_1] = mmROT1_BASE
1443 };
1444 
1445 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1446 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1447 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1448 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1449 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1450 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1451 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1452 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1453 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1454 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1455 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1456 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1457 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1458 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1459 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1460 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1461 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1462 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1463 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1464 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1465 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1466 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1467 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1468 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1469 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1470 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1471 };
1472 
1473 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1474 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1475 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1476 };
1477 
1478 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1479 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1480 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1481 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1482 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1483 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1484 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1485 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1486 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1487 };
1488 
1489 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1490 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1491 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1492 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1493 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1494 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1495 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1496 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1497 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1498 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1499 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1500 };
1501 
1502 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1503 	RTR_ID_X_Y(2, 4),
1504 	RTR_ID_X_Y(3, 4),
1505 	RTR_ID_X_Y(4, 4),
1506 	RTR_ID_X_Y(5, 4),
1507 	RTR_ID_X_Y(6, 4),
1508 	RTR_ID_X_Y(7, 4),
1509 	RTR_ID_X_Y(8, 4),
1510 	RTR_ID_X_Y(9, 4),
1511 	RTR_ID_X_Y(10, 4),
1512 	RTR_ID_X_Y(11, 4),
1513 	RTR_ID_X_Y(12, 4),
1514 	RTR_ID_X_Y(13, 4),
1515 	RTR_ID_X_Y(14, 4),
1516 	RTR_ID_X_Y(15, 4),
1517 	RTR_ID_X_Y(16, 4),
1518 	RTR_ID_X_Y(17, 4),
1519 	RTR_ID_X_Y(2, 11),
1520 	RTR_ID_X_Y(3, 11),
1521 	RTR_ID_X_Y(4, 11),
1522 	RTR_ID_X_Y(5, 11),
1523 	RTR_ID_X_Y(6, 11),
1524 	RTR_ID_X_Y(7, 11),
1525 	RTR_ID_X_Y(8, 11),
1526 	RTR_ID_X_Y(9, 11),
1527 	RTR_ID_X_Y(0, 0),/* 24 no id */
1528 	RTR_ID_X_Y(0, 0),/* 25 no id */
1529 	RTR_ID_X_Y(0, 0),/* 26 no id */
1530 	RTR_ID_X_Y(0, 0),/* 27 no id */
1531 	RTR_ID_X_Y(14, 11),
1532 	RTR_ID_X_Y(15, 11),
1533 	RTR_ID_X_Y(16, 11),
1534 	RTR_ID_X_Y(17, 11)
1535 };
1536 
1537 enum rtr_id {
1538 	DCORE0_RTR0,
1539 	DCORE0_RTR1,
1540 	DCORE0_RTR2,
1541 	DCORE0_RTR3,
1542 	DCORE0_RTR4,
1543 	DCORE0_RTR5,
1544 	DCORE0_RTR6,
1545 	DCORE0_RTR7,
1546 	DCORE1_RTR0,
1547 	DCORE1_RTR1,
1548 	DCORE1_RTR2,
1549 	DCORE1_RTR3,
1550 	DCORE1_RTR4,
1551 	DCORE1_RTR5,
1552 	DCORE1_RTR6,
1553 	DCORE1_RTR7,
1554 	DCORE2_RTR0,
1555 	DCORE2_RTR1,
1556 	DCORE2_RTR2,
1557 	DCORE2_RTR3,
1558 	DCORE2_RTR4,
1559 	DCORE2_RTR5,
1560 	DCORE2_RTR6,
1561 	DCORE2_RTR7,
1562 	DCORE3_RTR0,
1563 	DCORE3_RTR1,
1564 	DCORE3_RTR2,
1565 	DCORE3_RTR3,
1566 	DCORE3_RTR4,
1567 	DCORE3_RTR5,
1568 	DCORE3_RTR6,
1569 	DCORE3_RTR7,
1570 };
1571 
1572 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1573 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1574 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1575 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1576 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1577 	DCORE0_RTR0
1578 };
1579 
1580 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1581 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1582 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1583 	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1584 	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1585 	DCORE0_RTR0
1586 };
1587 
1588 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1589 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1590 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1591 };
1592 
1593 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1594 	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1595 	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1596 };
1597 
1598 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1599 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1600 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1601 };
1602 
1603 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1604 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1605 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1606 };
1607 
1608 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1609 	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1610 	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1611 	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1612 	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1613 	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1614 	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1615 	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1616 	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1617 };
1618 
1619 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1620 	DCORE0_RTR0, DCORE0_RTR0
1621 };
1622 
1623 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1624 	DCORE0_RTR2, DCORE0_RTR2
1625 };
1626 
1627 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1628 	DCORE2_RTR0, DCORE3_RTR7
1629 };
1630 
1631 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1632 	DCORE2_RTR2, DCORE3_RTR5
1633 };
1634 
1635 struct mme_initiators_rtr_id {
1636 	u32 wap0;
1637 	u32 wap1;
1638 	u32 write;
1639 	u32 read;
1640 	u32 sbte0;
1641 	u32 sbte1;
1642 	u32 sbte2;
1643 	u32 sbte3;
1644 	u32 sbte4;
1645 };
1646 
1647 enum mme_initiators {
1648 	MME_WAP0 = 0,
1649 	MME_WAP1,
1650 	MME_WRITE,
1651 	MME_READ,
1652 	MME_SBTE0,
1653 	MME_SBTE1,
1654 	MME_SBTE2,
1655 	MME_SBTE3,
1656 	MME_SBTE4,
1657 	MME_INITIATORS_MAX
1658 };
1659 
1660 static const struct mme_initiators_rtr_id
1661 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1662 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1663 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1664 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1665 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1666 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1667 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1668 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1669 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1670 };
1671 
1672 enum razwi_event_sources {
1673 	RAZWI_TPC,
1674 	RAZWI_MME,
1675 	RAZWI_EDMA,
1676 	RAZWI_PDMA,
1677 	RAZWI_NIC,
1678 	RAZWI_DEC,
1679 	RAZWI_ROT
1680 };
1681 
1682 struct hbm_mc_error_causes {
1683 	u32 mask;
1684 	char cause[50];
1685 };
1686 
1687 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
1688 
1689 /* Special blocks iterator is currently used to configure security protection bits,
1690  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
1691  * must be skipped. Following configurations are commonly used for both PB config
1692  * and global error reading, since currently they both share the same settings.
1693  * Once it changes, we must remember to use separate configurations for either one.
1694  */
1695 static int gaudi2_iterator_skip_block_types[] = {
1696 		GAUDI2_BLOCK_TYPE_PLL,
1697 		GAUDI2_BLOCK_TYPE_EU_BIST,
1698 		GAUDI2_BLOCK_TYPE_HBM,
1699 		GAUDI2_BLOCK_TYPE_XFT
1700 };
1701 
1702 static struct range gaudi2_iterator_skip_block_ranges[] = {
1703 		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
1704 		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
1705 		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
1706 		/* Skip all CPU blocks except for CPU_IF */
1707 		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
1708 		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
1709 };
1710 
1711 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1712 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1713 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1714 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1715 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1716 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1717 };
1718 
1719 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1720 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1721 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1722 	[HBM_SEI_READ_ERR] = "SEI read data error",
1723 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1724 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1725 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1726 	[HBM_SEI_DFI] = "SEI DFI error",
1727 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1728 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1729 };
1730 
1731 struct mmu_spi_sei_cause {
1732 	char cause[50];
1733 	int clear_bit;
1734 };
1735 
1736 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1737 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
1738 	{"page access", 1},		/* INTERRUPT_CLR[1] */
1739 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
1740 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
1741 	{"mmu rei0", -1},		/* no clear register bit */
1742 	{"mmu rei1", -1},		/* no clear register bit */
1743 	{"stlb rei0", -1},		/* no clear register bit */
1744 	{"stlb rei1", -1},		/* no clear register bit */
1745 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
1746 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
1747 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
1748 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
1749 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1750 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1751 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1752 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1753 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
1754 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
1755 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
1756 };
1757 
1758 struct gaudi2_cache_invld_params {
1759 	u64 start_va;
1760 	u64 end_va;
1761 	u32 inv_start_val;
1762 	u32 flags;
1763 	bool range_invalidation;
1764 };
1765 
1766 struct gaudi2_tpc_idle_data {
1767 	struct engines_data *e;
1768 	unsigned long *mask;
1769 	bool *is_idle;
1770 	const char *tpc_fmt;
1771 };
1772 
1773 struct gaudi2_tpc_mmu_data {
1774 	u32 rw_asid;
1775 };
1776 
1777 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1778 
1779 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1780 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1781 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1782 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1783 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1784 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1785 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1786 										bool is_memset);
1787 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1788 
1789 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1790 {
1791 
1792 }
1793 
1794 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1795 {
1796 	return sizeof(struct packet_msg_short);
1797 }
1798 
1799 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1800 {
1801 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1802 }
1803 
1804 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1805 {
1806 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1807 	int dcore, inst, tpc_seq;
1808 	u32 offset;
1809 
1810 	/* init the return code */
1811 	ctx->rc = 0;
1812 
1813 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1814 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1815 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1816 
1817 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1818 				continue;
1819 
1820 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1821 
1822 			ctx->fn(hdev, dcore, inst, offset, ctx);
1823 			if (ctx->rc) {
1824 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1825 							dcore, inst);
1826 				return;
1827 			}
1828 		}
1829 	}
1830 
1831 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1832 		return;
1833 
1834 	/* special check for PCI TPC (DCORE0_TPC6) */
1835 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1836 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1837 	if (ctx->rc)
1838 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1839 }
1840 
1841 static bool gaudi2_host_phys_addr_valid(u64 addr)
1842 {
1843 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1844 		return true;
1845 
1846 	return false;
1847 }
1848 
1849 static int set_number_of_functional_hbms(struct hl_device *hdev)
1850 {
1851 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1852 	u8 faulty_hbms = hweight64(hdev->dram_binning);
1853 
1854 	/* check if all HBMs should be used */
1855 	if (!faulty_hbms) {
1856 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1857 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
1858 		return 0;
1859 	}
1860 
1861 	/*
1862 	 * check for error condition in which number of binning
1863 	 * candidates is higher than the maximum supported by the
1864 	 * driver (in which case binning mask shall be ignored and driver will
1865 	 * set the default)
1866 	 */
1867 	if (faulty_hbms > MAX_FAULTY_HBMS) {
1868 		dev_err(hdev->dev,
1869 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1870 			MAX_FAULTY_HBMS, hdev->dram_binning);
1871 		return -EINVAL;
1872 	}
1873 
1874 	/*
1875 	 * by default, number of functional HBMs in Gaudi2 is always
1876 	 * GAUDI2_HBM_NUM - 1.
1877 	 */
1878 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1879 	return 0;
1880 }
1881 
1882 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1883 {
1884 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1885 	u32 basic_hbm_page_size;
1886 	int rc;
1887 
1888 	rc = set_number_of_functional_hbms(hdev);
1889 	if (rc)
1890 		return -EINVAL;
1891 
1892 	/*
1893 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1894 	 * in which we are using x16 bigger page size to be able to populate the entire
1895 	 * HBM mappings in the TLB
1896 	 */
1897 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1898 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1899 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1900 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
1901 	prop->dram_base_address = DRAM_PHYS_BASE;
1902 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1903 	prop->dram_supports_virtual_memory = true;
1904 
1905 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1906 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1907 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1908 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1909 
1910 	/* since DRAM page size differs from DMMU page size we need to allocate
1911 	 * DRAM memory in units of dram_page size and mapping this memory in
1912 	 * units of DMMU page size. we overcome this size mismatch using a
1913 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
1914 	 * page.
1915 	 * We therefore:
1916 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
1917 	 *    (suppose we get n such pages)
1918 	 * 2. limit the amount of virtual address space we got from 1 above to
1919 	 *    a multiple of 64M as we don't want the scrambled address to cross
1920 	 *    the DRAM virtual address space.
1921 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
1922 	 * 3. determine the and address accordingly
1923 	 *    end_addr = start_addr + m * 48M
1924 	 *
1925 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
1926 	 */
1927 	prop->dmmu.start_addr = prop->dram_base_address +
1928 			(prop->dram_page_size *
1929 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1930 
1931 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1932 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1933 
1934 	return 0;
1935 }
1936 
1937 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1938 {
1939 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1940 	struct hw_queue_properties *q_props;
1941 	u32 num_sync_stream_queues = 0;
1942 	int i;
1943 
1944 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1945 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1946 					GFP_KERNEL);
1947 
1948 	if (!prop->hw_queues_props)
1949 		return -ENOMEM;
1950 
1951 	q_props = prop->hw_queues_props;
1952 
1953 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1954 		q_props[i].type = QUEUE_TYPE_HW;
1955 		q_props[i].driver_only = 0;
1956 
1957 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1958 			q_props[i].supports_sync_stream = 0;
1959 		} else {
1960 			q_props[i].supports_sync_stream = 1;
1961 			num_sync_stream_queues++;
1962 		}
1963 
1964 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1965 	}
1966 
1967 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1968 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1969 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1970 
1971 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1972 	prop->cfg_base_address = CFG_BASE;
1973 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1974 	prop->host_base_address = HOST_PHYS_BASE_0;
1975 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1976 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1977 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1978 	prop->user_dec_intr_count = NUMBER_OF_DEC;
1979 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1980 	prop->completion_mode = HL_COMPLETION_MODE_CS;
1981 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1982 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1983 
1984 	prop->sram_base_address = SRAM_BASE_ADDR;
1985 	prop->sram_size = SRAM_SIZE;
1986 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1987 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1988 
1989 	prop->hints_range_reservation = true;
1990 
1991 	if (hdev->pldm)
1992 		prop->mmu_pgt_size = 0x800000; /* 8MB */
1993 	else
1994 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1995 
1996 	prop->mmu_pte_size = HL_PTE_SIZE;
1997 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1998 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1999 
2000 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2001 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2002 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2003 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2004 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2005 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2006 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2007 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2008 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2009 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2010 	prop->dmmu.page_size = PAGE_SIZE_1GB;
2011 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2012 	prop->dmmu.last_mask = LAST_MASK;
2013 	prop->dmmu.host_resident = 1;
2014 	/* TODO: will be duplicated until implementing per-MMU props */
2015 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2016 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2017 
2018 	/*
2019 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
2020 	 * the addresses and allocated space for FW image does not cross memory bounds).
2021 	 * for this reason we set the DRAM size to the minimum possible and later it will
2022 	 * be modified according to what reported in the cpucp info packet
2023 	 */
2024 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2025 
2026 	hdev->pmmu_huge_range = true;
2027 	prop->pmmu.host_resident = 1;
2028 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2029 	prop->pmmu.last_mask = LAST_MASK;
2030 	/* TODO: will be duplicated until implementing per-MMU props */
2031 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2032 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2033 
2034 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2035 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2036 	prop->hints_host_hpage_reserved_va_range.start_addr =
2037 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2038 	prop->hints_host_hpage_reserved_va_range.end_addr =
2039 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2040 
2041 	if (PAGE_SIZE == SZ_64K) {
2042 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2043 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2044 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2045 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2046 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2047 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2048 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2049 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2050 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2051 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2052 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2053 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2054 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2055 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2056 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2057 
2058 		/* shifts and masks are the same in PMMU and HPMMU */
2059 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2060 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2061 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2062 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2063 	} else {
2064 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2065 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2066 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2067 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2068 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2069 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2070 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2071 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2072 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2073 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2074 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2075 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2076 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2077 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2078 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2079 
2080 		/* shifts and masks are the same in PMMU and HPMMU */
2081 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2082 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2083 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2084 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2085 	}
2086 
2087 	prop->num_engine_cores = CPU_ID_MAX;
2088 	prop->cfg_size = CFG_SIZE;
2089 	prop->max_asid = MAX_ASID;
2090 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2091 
2092 	prop->dc_power_default = DC_POWER_DEFAULT;
2093 
2094 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2095 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2096 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2097 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2098 
2099 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2100 
2101 	prop->mme_master_slave_mode = 1;
2102 
2103 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2104 					(num_sync_stream_queues * HL_RSVD_SOBS);
2105 
2106 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2107 					(num_sync_stream_queues * HL_RSVD_MONS);
2108 
2109 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2110 
2111 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2112 
2113 	prop->fw_cpu_boot_dev_sts0_valid = false;
2114 	prop->fw_cpu_boot_dev_sts1_valid = false;
2115 	prop->hard_reset_done_by_fw = false;
2116 	prop->gic_interrupts_enable = true;
2117 
2118 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2119 
2120 	prop->max_dec = NUMBER_OF_DEC;
2121 
2122 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2123 
2124 	prop->dma_mask = 64;
2125 
2126 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2127 
2128 	return 0;
2129 }
2130 
2131 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2132 {
2133 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2134 	bool is_wc[3] = {false, false, true};
2135 	int rc;
2136 
2137 	rc = hl_pci_bars_map(hdev, name, is_wc);
2138 	if (rc)
2139 		return rc;
2140 
2141 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2142 
2143 	return 0;
2144 }
2145 
2146 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2147 {
2148 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2149 	struct hl_inbound_pci_region pci_region;
2150 	u64 old_addr = addr;
2151 	int rc;
2152 
2153 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2154 		return old_addr;
2155 
2156 	if (hdev->asic_prop.iatu_done_by_fw)
2157 		return U64_MAX;
2158 
2159 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2160 	pci_region.mode = PCI_BAR_MATCH_MODE;
2161 	pci_region.bar = DRAM_BAR_ID;
2162 	pci_region.addr = addr;
2163 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2164 	if (rc)
2165 		return U64_MAX;
2166 
2167 	if (gaudi2) {
2168 		old_addr = gaudi2->dram_bar_cur_addr;
2169 		gaudi2->dram_bar_cur_addr = addr;
2170 	}
2171 
2172 	return old_addr;
2173 }
2174 
2175 static int gaudi2_init_iatu(struct hl_device *hdev)
2176 {
2177 	struct hl_inbound_pci_region inbound_region;
2178 	struct hl_outbound_pci_region outbound_region;
2179 	u32 bar_addr_low, bar_addr_high;
2180 	int rc;
2181 
2182 	if (hdev->asic_prop.iatu_done_by_fw)
2183 		return 0;
2184 
2185 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2186 	 * We must map this region in BAR match mode in order to
2187 	 * fetch BAR physical base address
2188 	 */
2189 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2190 	inbound_region.bar = SRAM_CFG_BAR_ID;
2191 	/* Base address must be aligned to Bar size which is 256 MB */
2192 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2193 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2194 	if (rc)
2195 		return rc;
2196 
2197 	/* Fetch physical BAR address */
2198 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2199 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2200 
2201 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2202 
2203 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2204 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2205 	inbound_region.bar = SRAM_CFG_BAR_ID;
2206 	inbound_region.offset_in_bar = 0;
2207 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2208 	inbound_region.size = CFG_REGION_SIZE;
2209 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2210 	if (rc)
2211 		return rc;
2212 
2213 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2214 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2215 	inbound_region.bar = SRAM_CFG_BAR_ID;
2216 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2217 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2218 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2219 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2220 	if (rc)
2221 		return rc;
2222 
2223 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2224 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2225 	inbound_region.bar = DRAM_BAR_ID;
2226 	inbound_region.addr = DRAM_PHYS_BASE;
2227 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2228 	if (rc)
2229 		return rc;
2230 
2231 	/* Outbound Region 0 - Point to Host */
2232 	outbound_region.addr = HOST_PHYS_BASE_0;
2233 	outbound_region.size = HOST_PHYS_SIZE_0;
2234 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2235 
2236 	return rc;
2237 }
2238 
2239 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2240 {
2241 	return RREG32(mmHW_STATE);
2242 }
2243 
2244 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2245 {
2246 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2247 
2248 	/*
2249 	 * check for error condition in which number of binning candidates
2250 	 * is higher than the maximum supported by the driver
2251 	 */
2252 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2253 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2254 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2255 					hdev->tpc_binning);
2256 		return -EINVAL;
2257 	}
2258 
2259 	prop->tpc_binning_mask = hdev->tpc_binning;
2260 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2261 
2262 	return 0;
2263 }
2264 
2265 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2266 {
2267 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2268 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2269 	u64 tpc_binning_mask;
2270 	u8 subst_idx = 0;
2271 	int i, rc;
2272 
2273 	rc = gaudi2_tpc_binning_init_prop(hdev);
2274 	if (rc)
2275 		return rc;
2276 
2277 	tpc_binning_mask = prop->tpc_binning_mask;
2278 
2279 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2280 		u8 subst_seq, binned, qid_base;
2281 
2282 		if (tpc_binning_mask == 0)
2283 			break;
2284 
2285 		if (subst_idx == 0) {
2286 			subst_seq = TPC_ID_DCORE0_TPC6;
2287 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2288 		} else {
2289 			subst_seq = TPC_ID_DCORE3_TPC5;
2290 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2291 		}
2292 
2293 
2294 		/* clear bit from mask */
2295 		binned = __ffs(tpc_binning_mask);
2296 		/*
2297 		 * Coverity complains about possible out-of-bound access in
2298 		 * clear_bit
2299 		 */
2300 		if (binned >= TPC_ID_SIZE) {
2301 			dev_err(hdev->dev,
2302 				"Invalid binned TPC (binning mask: %llx)\n",
2303 				tpc_binning_mask);
2304 			return -EINVAL;
2305 		}
2306 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2307 
2308 		/* also clear replacing TPC bit from enabled mask */
2309 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2310 
2311 		/* bin substite TPC's Qs */
2312 		q_props[qid_base].binned = 1;
2313 		q_props[qid_base + 1].binned = 1;
2314 		q_props[qid_base + 2].binned = 1;
2315 		q_props[qid_base + 3].binned = 1;
2316 
2317 		subst_idx++;
2318 	}
2319 
2320 	return 0;
2321 }
2322 
2323 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2324 {
2325 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2326 	u8 num_faulty;
2327 
2328 	num_faulty = hweight32(hdev->decoder_binning);
2329 
2330 	/*
2331 	 * check for error condition in which number of binning candidates
2332 	 * is higher than the maximum supported by the driver
2333 	 */
2334 	if (num_faulty > MAX_FAULTY_DECODERS) {
2335 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2336 						hdev->decoder_binning);
2337 		return -EINVAL;
2338 	}
2339 
2340 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2341 
2342 	if (prop->decoder_binning_mask)
2343 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2344 	else
2345 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2346 
2347 	return 0;
2348 }
2349 
2350 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2351 {
2352 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2353 
2354 	/* check if we should override default binning */
2355 	if (!hdev->dram_binning) {
2356 		prop->dram_binning_mask = 0;
2357 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2358 		return;
2359 	}
2360 
2361 	/* set DRAM binning constraints */
2362 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2363 	prop->dram_binning_mask = hdev->dram_binning;
2364 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2365 }
2366 
2367 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2368 {
2369 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2370 	struct hw_queue_properties *q_props;
2371 	u8 seq, num_faulty;
2372 
2373 	num_faulty = hweight32(hdev->edma_binning);
2374 
2375 	/*
2376 	 * check for error condition in which number of binning candidates
2377 	 * is higher than the maximum supported by the driver
2378 	 */
2379 	if (num_faulty > MAX_FAULTY_EDMAS) {
2380 		dev_err(hdev->dev,
2381 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2382 			hdev->edma_binning);
2383 		return -EINVAL;
2384 	}
2385 
2386 	if (!hdev->edma_binning) {
2387 		prop->edma_binning_mask = 0;
2388 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2389 		return 0;
2390 	}
2391 
2392 	seq = __ffs((unsigned long)hdev->edma_binning);
2393 
2394 	/* set binning constraints */
2395 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2396 	prop->edma_binning_mask = hdev->edma_binning;
2397 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2398 
2399 	/* bin substitute EDMA's queue */
2400 	q_props = prop->hw_queues_props;
2401 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2402 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2403 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2404 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2405 
2406 	return 0;
2407 }
2408 
2409 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2410 {
2411 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2412 	u8 num_faulty, seq;
2413 
2414 	/* check if we should override default binning */
2415 	if (!xbar_edge_iso_mask) {
2416 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2417 		return 0;
2418 	}
2419 
2420 	/*
2421 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2422 	 * only the FW can set a redundancy value). for user it'll always be 0.
2423 	 */
2424 	num_faulty = hweight32(xbar_edge_iso_mask);
2425 
2426 	/*
2427 	 * check for error condition in which number of binning candidates
2428 	 * is higher than the maximum supported by the driver
2429 	 */
2430 	if (num_faulty > MAX_FAULTY_XBARS) {
2431 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2432 									MAX_FAULTY_XBARS);
2433 		return -EINVAL;
2434 	}
2435 
2436 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2437 
2438 	/* set binning constraints */
2439 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2440 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2441 
2442 	return 0;
2443 }
2444 
2445 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2446 {
2447 	int rc;
2448 
2449 	/*
2450 	 * mark all clusters as good, each component will "fail" cluster
2451 	 * based on eFuse/user values.
2452 	 * If more than single cluster is faulty- the chip is unusable
2453 	 */
2454 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2455 
2456 	gaudi2_set_dram_binning_masks(hdev);
2457 
2458 	rc = gaudi2_set_edma_binning_masks(hdev);
2459 	if (rc)
2460 		return rc;
2461 
2462 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2463 	if (rc)
2464 		return rc;
2465 
2466 
2467 	/* always initially set to full mask */
2468 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2469 
2470 	return 0;
2471 }
2472 
2473 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2474 {
2475 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2476 	int rc;
2477 
2478 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2479 	if (rc)
2480 		return rc;
2481 
2482 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2483 	if (prop->faulty_dram_cluster_map) {
2484 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2485 
2486 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2487 	}
2488 
2489 	return 0;
2490 }
2491 
2492 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2493 {
2494 	int rc;
2495 
2496 	rc = gaudi2_set_cluster_binning_masks(hdev);
2497 	if (rc)
2498 		return rc;
2499 
2500 	rc = gaudi2_set_tpc_binning_masks(hdev);
2501 	if (rc)
2502 		return rc;
2503 
2504 	rc = gaudi2_set_dec_binning_masks(hdev);
2505 	if (rc)
2506 		return rc;
2507 
2508 	return 0;
2509 }
2510 
2511 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2512 {
2513 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2514 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2515 	long max_power;
2516 	u64 dram_size;
2517 	int rc;
2518 
2519 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2520 		return 0;
2521 
2522 	/* No point of asking this information again when not doing hard reset, as the device
2523 	 * CPU hasn't been reset
2524 	 */
2525 	if (hdev->reset_info.in_compute_reset)
2526 		return 0;
2527 
2528 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2529 										mmCPU_BOOT_ERR1);
2530 	if (rc)
2531 		return rc;
2532 
2533 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2534 	if (dram_size) {
2535 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2536 
2537 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2538 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2539 			dev_err(hdev->dev,
2540 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2541 				dram_size, prop->dram_size);
2542 			dram_size = prop->dram_size;
2543 		}
2544 
2545 		prop->dram_size = dram_size;
2546 		prop->dram_end_address = prop->dram_base_address + dram_size;
2547 	}
2548 
2549 	if (!strlen(prop->cpucp_info.card_name))
2550 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2551 
2552 	/* Overwrite binning masks with the actual binning values from F/W */
2553 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2554 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2555 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2556 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2557 
2558 	/*
2559 	 * at this point the DRAM parameters need to be updated according to data obtained
2560 	 * from the FW
2561 	 */
2562 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2563 	if (rc)
2564 		return rc;
2565 
2566 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2567 	if (rc)
2568 		return rc;
2569 
2570 	max_power = hl_fw_get_max_power(hdev);
2571 	if (max_power < 0)
2572 		return max_power;
2573 
2574 	prop->max_power_default = (u64) max_power;
2575 
2576 	return 0;
2577 }
2578 
2579 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2580 {
2581 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2582 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2583 	int rc;
2584 
2585 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2586 		return 0;
2587 
2588 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2589 	if (rc)
2590 		return rc;
2591 
2592 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2593 
2594 	return 0;
2595 }
2596 
2597 static int gaudi2_early_init(struct hl_device *hdev)
2598 {
2599 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2600 	struct pci_dev *pdev = hdev->pdev;
2601 	resource_size_t pci_bar_size;
2602 	int rc;
2603 
2604 	rc = gaudi2_set_fixed_properties(hdev);
2605 	if (rc)
2606 		return rc;
2607 
2608 	/* Check BAR sizes */
2609 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2610 
2611 	if (pci_bar_size != CFG_BAR_SIZE) {
2612 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2613 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2614 		rc = -ENODEV;
2615 		goto free_queue_props;
2616 	}
2617 
2618 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2619 	if (pci_bar_size != MSIX_BAR_SIZE) {
2620 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2621 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2622 		rc = -ENODEV;
2623 		goto free_queue_props;
2624 	}
2625 
2626 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2627 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2628 
2629 	/*
2630 	 * Only in pldm driver config iATU
2631 	 */
2632 	if (hdev->pldm)
2633 		hdev->asic_prop.iatu_done_by_fw = false;
2634 	else
2635 		hdev->asic_prop.iatu_done_by_fw = true;
2636 
2637 	rc = hl_pci_init(hdev);
2638 	if (rc)
2639 		goto free_queue_props;
2640 
2641 	/* Before continuing in the initialization, we need to read the preboot
2642 	 * version to determine whether we run with a security-enabled firmware
2643 	 */
2644 	rc = hl_fw_read_preboot_status(hdev);
2645 	if (rc) {
2646 		if (hdev->reset_on_preboot_fail)
2647 			hdev->asic_funcs->hw_fini(hdev, true, false);
2648 		goto pci_fini;
2649 	}
2650 
2651 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2652 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2653 		hdev->asic_funcs->hw_fini(hdev, true, false);
2654 	}
2655 
2656 	return 0;
2657 
2658 pci_fini:
2659 	hl_pci_fini(hdev);
2660 free_queue_props:
2661 	kfree(hdev->asic_prop.hw_queues_props);
2662 	return rc;
2663 }
2664 
2665 static int gaudi2_early_fini(struct hl_device *hdev)
2666 {
2667 	kfree(hdev->asic_prop.hw_queues_props);
2668 	hl_pci_fini(hdev);
2669 
2670 	return 0;
2671 }
2672 
2673 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2674 {
2675 	switch (arc_id) {
2676 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2677 		return true;
2678 	default:
2679 		return false;
2680 	}
2681 }
2682 
2683 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2684 {
2685 	switch (arc_id) {
2686 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2687 		return true;
2688 	default:
2689 		return false;
2690 	}
2691 }
2692 
2693 static void gaudi2_init_arcs(struct hl_device *hdev)
2694 {
2695 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2696 	u64 arc_id;
2697 	u32 i;
2698 
2699 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2700 		if (gaudi2_is_arc_enabled(hdev, i))
2701 			continue;
2702 
2703 		gaudi2_set_arc_id_cap(hdev, i);
2704 	}
2705 
2706 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2707 		if (!gaudi2_is_queue_enabled(hdev, i))
2708 			continue;
2709 
2710 		arc_id = gaudi2_queue_id_to_arc_id[i];
2711 		if (gaudi2_is_arc_enabled(hdev, arc_id))
2712 			continue;
2713 
2714 		if (gaudi2_is_arc_nic_owned(arc_id) &&
2715 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2716 			continue;
2717 
2718 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2719 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2720 			continue;
2721 
2722 		gaudi2_set_arc_id_cap(hdev, arc_id);
2723 	}
2724 }
2725 
2726 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2727 {
2728 	u32 reg_base, reg_val;
2729 	int rc;
2730 
2731 	switch (cpu_id) {
2732 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2733 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
2734 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2735 						ARC_DCCM_BLOCK_SIZE * 2, true);
2736 		if (rc)
2737 			return rc;
2738 		break;
2739 	case CPU_ID_SCHED_ARC4:
2740 	case CPU_ID_SCHED_ARC5:
2741 	case CPU_ID_MME_QMAN_ARC0:
2742 	case CPU_ID_MME_QMAN_ARC1:
2743 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
2744 
2745 		/* Scrub lower DCCM block */
2746 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2747 						ARC_DCCM_BLOCK_SIZE, true);
2748 		if (rc)
2749 			return rc;
2750 
2751 		/* Switch to upper DCCM block */
2752 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2753 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2754 
2755 		/* Scrub upper DCCM block */
2756 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2757 						ARC_DCCM_BLOCK_SIZE, true);
2758 		if (rc)
2759 			return rc;
2760 
2761 		/* Switch to lower DCCM block */
2762 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2763 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2764 		break;
2765 	default:
2766 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2767 						ARC_DCCM_BLOCK_SIZE, true);
2768 		if (rc)
2769 			return rc;
2770 	}
2771 
2772 	return 0;
2773 }
2774 
2775 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2776 {
2777 	u16 arc_id;
2778 
2779 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2780 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
2781 			continue;
2782 
2783 		gaudi2_scrub_arc_dccm(hdev, arc_id);
2784 	}
2785 }
2786 
2787 static int gaudi2_late_init(struct hl_device *hdev)
2788 {
2789 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2790 	int rc;
2791 
2792 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
2793 
2794 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2795 					gaudi2->virt_msix_db_dma_addr);
2796 	if (rc) {
2797 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2798 		return rc;
2799 	}
2800 
2801 	rc = gaudi2_fetch_psoc_frequency(hdev);
2802 	if (rc) {
2803 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2804 		goto disable_pci_access;
2805 	}
2806 
2807 	gaudi2_init_arcs(hdev);
2808 	gaudi2_scrub_arcs_dccm(hdev);
2809 	gaudi2_init_security(hdev);
2810 
2811 	return 0;
2812 
2813 disable_pci_access:
2814 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2815 
2816 	return rc;
2817 }
2818 
2819 static void gaudi2_late_fini(struct hl_device *hdev)
2820 {
2821 	hl_hwmon_release_resources(hdev);
2822 }
2823 
2824 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2825 {
2826 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2827 
2828 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2829 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2830 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2831 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2832 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2833 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2834 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2835 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2836 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2837 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2838 }
2839 
2840 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2841 {
2842 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2843 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2844 	u32 block_size, umr_start_idx, num_umr_blocks;
2845 	int i;
2846 
2847 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2848 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2849 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
2850 		else
2851 			block_size = ARC_DCCM_BLOCK_SIZE;
2852 
2853 		blocks[i].address = gaudi2_arc_dccm_bases[i];
2854 		blocks[i].size = block_size;
2855 	}
2856 
2857 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2858 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2859 
2860 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2861 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2862 
2863 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2864 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2865 
2866 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2867 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2868 
2869 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2870 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2871 
2872 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2873 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2874 
2875 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2876 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2877 
2878 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2879 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2880 
2881 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2882 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2883 	for (i = 0 ; i < num_umr_blocks ; i++) {
2884 		u8 nic_id, umr_block_id;
2885 
2886 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2887 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2888 
2889 		blocks[umr_start_idx + i].address =
2890 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2891 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2892 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2893 			umr_block_id * NIC_UMR_OFFSET;
2894 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2895 	}
2896 
2897 	/* Expose decoder HW configuration block to user */
2898 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2899 
2900 	for (i = 1; i < NUM_OF_DCORES; ++i) {
2901 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2902 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2903 
2904 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2905 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2906 
2907 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2908 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2909 	}
2910 }
2911 
2912 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2913 {
2914 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2915 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2916 	int i, j, rc = 0;
2917 
2918 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
2919 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2920 	 * range.
2921 	 */
2922 
2923 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2924 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2925 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2926 		if (!virt_addr_arr[i]) {
2927 			rc = -ENOMEM;
2928 			goto free_dma_mem_arr;
2929 		}
2930 
2931 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2932 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2933 			break;
2934 	}
2935 
2936 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2937 		dev_err(hdev->dev,
2938 			"MSB of ARC accessible DMA memory are not identical in all range\n");
2939 		rc = -EFAULT;
2940 		goto free_dma_mem_arr;
2941 	}
2942 
2943 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2944 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2945 
2946 free_dma_mem_arr:
2947 	for (j = 0 ; j < i ; j++)
2948 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2949 						dma_addr_arr[j]);
2950 
2951 	return rc;
2952 }
2953 
2954 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2955 {
2956 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2957 	struct pci_mem_region *region;
2958 
2959 	/* CFG */
2960 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
2961 	region->region_base = CFG_BASE;
2962 	region->region_size = CFG_SIZE;
2963 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2964 	region->bar_size = CFG_BAR_SIZE;
2965 	region->bar_id = SRAM_CFG_BAR_ID;
2966 	region->used = 1;
2967 
2968 	/* SRAM */
2969 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2970 	region->region_base = SRAM_BASE_ADDR;
2971 	region->region_size = SRAM_SIZE;
2972 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2973 	region->bar_size = CFG_BAR_SIZE;
2974 	region->bar_id = SRAM_CFG_BAR_ID;
2975 	region->used = 1;
2976 
2977 	/* DRAM */
2978 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2979 	region->region_base = DRAM_PHYS_BASE;
2980 	region->region_size = hdev->asic_prop.dram_size;
2981 	region->offset_in_bar = 0;
2982 	region->bar_size = prop->dram_pci_bar_size;
2983 	region->bar_id = DRAM_BAR_ID;
2984 	region->used = 1;
2985 }
2986 
2987 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2988 {
2989 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2990 	int i, j, k;
2991 
2992 	/* Initialize common user CQ interrupt */
2993 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2994 				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
2995 
2996 	/* Initialize common decoder interrupt */
2997 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2998 				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
2999 
3000 	/* User interrupts structure holds both decoder and user interrupts from various engines.
3001 	 * We first initialize the decoder interrupts and then we add the user interrupts.
3002 	 * The only limitation is that the last decoder interrupt id must be smaller
3003 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3004 	 */
3005 
3006 	/* Initialize decoder interrupts, expose only normal interrupts,
3007 	 * error interrupts to be handled by driver
3008 	 */
3009 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3010 										i += 2, j++)
3011 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3012 						HL_USR_INTERRUPT_DECODER);
3013 
3014 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3015 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3016 }
3017 
3018 static inline int gaudi2_get_non_zero_random_int(void)
3019 {
3020 	int rand = get_random_u32();
3021 
3022 	return rand ? rand : 1;
3023 }
3024 
3025 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3026 {
3027 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3028 	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3029 			&prop->skip_special_blocks_cfg;
3030 
3031 	kfree(prop->special_blocks);
3032 	kfree(skip_special_blocks_cfg->block_types);
3033 	kfree(skip_special_blocks_cfg->block_ranges);
3034 }
3035 
3036 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3037 {
3038 	gaudi2_special_blocks_free(hdev);
3039 }
3040 
3041 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3042 		struct hl_special_blocks_cfg *special_blocks_cfg,
3043 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3044 {
3045 	return false;
3046 }
3047 
3048 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3049 {
3050 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3051 	int i, rc;
3052 
3053 	/* Configure Special blocks */
3054 	prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3055 	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3056 	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3057 			sizeof(*prop->special_blocks), GFP_KERNEL);
3058 	if (!prop->special_blocks)
3059 		return -ENOMEM;
3060 
3061 	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3062 		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3063 				sizeof(*prop->special_blocks));
3064 
3065 	/* Configure when to skip Special blocks */
3066 	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3067 	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3068 
3069 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3070 		prop->skip_special_blocks_cfg.block_types =
3071 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3072 					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3073 		if (!prop->skip_special_blocks_cfg.block_types) {
3074 			rc = -ENOMEM;
3075 			goto free_special_blocks;
3076 		}
3077 
3078 		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3079 				sizeof(gaudi2_iterator_skip_block_types));
3080 
3081 		prop->skip_special_blocks_cfg.block_types_len =
3082 					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3083 	}
3084 
3085 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3086 		prop->skip_special_blocks_cfg.block_ranges =
3087 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3088 					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3089 		if (!prop->skip_special_blocks_cfg.block_ranges) {
3090 			rc = -ENOMEM;
3091 			goto free_skip_special_blocks_types;
3092 		}
3093 
3094 		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3095 			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3096 					&gaudi2_iterator_skip_block_ranges[i],
3097 					sizeof(struct range));
3098 
3099 		prop->skip_special_blocks_cfg.block_ranges_len =
3100 					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3101 	}
3102 
3103 	return 0;
3104 
3105 free_skip_special_blocks_types:
3106 	kfree(prop->skip_special_blocks_cfg.block_types);
3107 free_special_blocks:
3108 	kfree(prop->special_blocks);
3109 
3110 	return rc;
3111 }
3112 
3113 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3114 {
3115 	return gaudi2_special_blocks_config(hdev);
3116 }
3117 
3118 static int gaudi2_sw_init(struct hl_device *hdev)
3119 {
3120 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3121 	struct gaudi2_device *gaudi2;
3122 	int i, rc;
3123 
3124 	/* Allocate device structure */
3125 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3126 	if (!gaudi2)
3127 		return -ENOMEM;
3128 
3129 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3130 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3131 			continue;
3132 
3133 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3134 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3135 				GAUDI2_EVENT_SIZE);
3136 			rc = -EINVAL;
3137 			goto free_gaudi2_device;
3138 		}
3139 
3140 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3141 	}
3142 
3143 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3144 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3145 
3146 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3147 
3148 	hdev->asic_specific = gaudi2;
3149 
3150 	/* Create DMA pool for small allocations.
3151 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3152 	 * PI/CI registers allocated from this pool have this restriction
3153 	 */
3154 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3155 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3156 	if (!hdev->dma_pool) {
3157 		dev_err(hdev->dev, "failed to create DMA pool\n");
3158 		rc = -ENOMEM;
3159 		goto free_gaudi2_device;
3160 	}
3161 
3162 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3163 	if (rc)
3164 		goto free_dma_pool;
3165 
3166 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3167 	if (!hdev->cpu_accessible_dma_pool) {
3168 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3169 		rc = -ENOMEM;
3170 		goto free_cpu_dma_mem;
3171 	}
3172 
3173 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3174 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3175 	if (rc) {
3176 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3177 		rc = -EFAULT;
3178 		goto free_cpu_accessible_dma_pool;
3179 	}
3180 
3181 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3182 								&gaudi2->virt_msix_db_dma_addr);
3183 	if (!gaudi2->virt_msix_db_cpu_addr) {
3184 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3185 		rc = -ENOMEM;
3186 		goto free_cpu_accessible_dma_pool;
3187 	}
3188 
3189 	spin_lock_init(&gaudi2->hw_queues_lock);
3190 
3191 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3192 							&gaudi2->scratchpad_bus_address,
3193 							GFP_KERNEL | __GFP_ZERO);
3194 	if (!gaudi2->scratchpad_kernel_address) {
3195 		rc = -ENOMEM;
3196 		goto free_virt_msix_db_mem;
3197 	}
3198 
3199 	gaudi2_user_mapped_blocks_init(hdev);
3200 
3201 	/* Initialize user interrupts */
3202 	gaudi2_user_interrupt_setup(hdev);
3203 
3204 	hdev->supports_coresight = true;
3205 	hdev->supports_sync_stream = true;
3206 	hdev->supports_cb_mapping = true;
3207 	hdev->supports_wait_for_multi_cs = false;
3208 
3209 	prop->supports_compute_reset = true;
3210 
3211 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3212 
3213 	rc = gaudi2_special_blocks_iterator_config(hdev);
3214 	if (rc)
3215 		goto free_scratchpad_mem;
3216 
3217 	return 0;
3218 
3219 free_scratchpad_mem:
3220 	hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address,
3221 				gaudi2->scratchpad_bus_address);
3222 free_virt_msix_db_mem:
3223 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3224 free_cpu_accessible_dma_pool:
3225 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3226 free_cpu_dma_mem:
3227 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3228 					hdev->cpu_accessible_dma_address);
3229 free_dma_pool:
3230 	dma_pool_destroy(hdev->dma_pool);
3231 free_gaudi2_device:
3232 	kfree(gaudi2);
3233 	return rc;
3234 }
3235 
3236 static int gaudi2_sw_fini(struct hl_device *hdev)
3237 {
3238 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3239 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3240 
3241 	gaudi2_special_blocks_iterator_free(hdev);
3242 
3243 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3244 
3245 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3246 
3247 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3248 						hdev->cpu_accessible_dma_address);
3249 
3250 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3251 					gaudi2->scratchpad_bus_address);
3252 
3253 	dma_pool_destroy(hdev->dma_pool);
3254 
3255 	kfree(gaudi2);
3256 
3257 	return 0;
3258 }
3259 
3260 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3261 {
3262 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3263 						QM_GLBL_CFG1_CQF_STOP |
3264 						QM_GLBL_CFG1_CP_STOP);
3265 
3266 	/* stop also the ARC */
3267 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3268 }
3269 
3270 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3271 {
3272 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3273 						QM_GLBL_CFG1_CQF_FLUSH |
3274 						QM_GLBL_CFG1_CP_FLUSH);
3275 }
3276 
3277 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3278 {
3279 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3280 }
3281 
3282 /**
3283  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3284  *
3285  * @hdev: pointer to the habanalabs device structure
3286  * @queue_id: queue to clear fence counters to
3287  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3288  *              getting stuck on any fence value. otherwise set all fence
3289  *              counters to 0 (standard clear of fence counters)
3290  */
3291 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3292 						bool skip_fence)
3293 {
3294 	u32 size, reg_base;
3295 	u32 addr, val;
3296 
3297 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3298 
3299 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3300 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3301 
3302 	/*
3303 	 * in case we want to make sure that QM that is stuck on a fence will
3304 	 * be released we should set the fence counter to a higher value that
3305 	 * the value the QM waiting for. to comply with any fence counter of
3306 	 * any value we set maximum fence value to all counters
3307 	 */
3308 	val = skip_fence ? U32_MAX : 0;
3309 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3310 }
3311 
3312 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3313 {
3314 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3315 
3316 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3317 	gaudi2_flush_qman_common(hdev, reg_base);
3318 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3319 }
3320 
3321 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3322 {
3323 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3324 	int dcore, inst;
3325 
3326 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3327 		goto stop_edma_qmans;
3328 
3329 	/* Stop CPs of PDMA QMANs */
3330 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3331 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3332 
3333 stop_edma_qmans:
3334 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3335 		return;
3336 
3337 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3338 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3339 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3340 			u32 qm_base;
3341 
3342 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3343 				continue;
3344 
3345 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3346 					inst * DCORE_EDMA_OFFSET;
3347 
3348 			/* Stop CPs of EDMA QMANs */
3349 			gaudi2_stop_qman_common(hdev, qm_base);
3350 		}
3351 	}
3352 }
3353 
3354 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3355 {
3356 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3357 	u32 offset, i;
3358 
3359 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3360 
3361 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3362 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3363 			continue;
3364 
3365 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3366 	}
3367 }
3368 
3369 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3370 {
3371 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3372 	u32 reg_base;
3373 	int i;
3374 
3375 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3376 		return;
3377 
3378 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3379 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3380 			continue;
3381 
3382 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3383 		gaudi2_stop_qman_common(hdev, reg_base);
3384 	}
3385 }
3386 
3387 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3388 {
3389 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3390 	u32 reg_base;
3391 	int i;
3392 
3393 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3394 		return;
3395 
3396 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3397 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3398 			continue;
3399 
3400 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3401 		gaudi2_stop_qman_common(hdev, reg_base);
3402 	}
3403 }
3404 
3405 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3406 {
3407 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3408 	u32 reg_base, queue_id;
3409 	int i;
3410 
3411 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3412 		return;
3413 
3414 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3415 
3416 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3417 		if (!(hdev->nic_ports_mask & BIT(i)))
3418 			continue;
3419 
3420 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3421 		gaudi2_stop_qman_common(hdev, reg_base);
3422 	}
3423 }
3424 
3425 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3426 {
3427 	u32 reg_val;
3428 
3429 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3430 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3431 }
3432 
3433 static void gaudi2_dma_stall(struct hl_device *hdev)
3434 {
3435 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3436 	int dcore, inst;
3437 
3438 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3439 		goto stall_edma;
3440 
3441 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3442 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3443 
3444 stall_edma:
3445 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3446 		return;
3447 
3448 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3449 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3450 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3451 			u32 core_base;
3452 
3453 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3454 				continue;
3455 
3456 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3457 					inst * DCORE_EDMA_OFFSET;
3458 
3459 			/* Stall CPs of EDMA QMANs */
3460 			gaudi2_stall_dma_common(hdev, core_base);
3461 		}
3462 	}
3463 }
3464 
3465 static void gaudi2_mme_stall(struct hl_device *hdev)
3466 {
3467 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3468 	u32 offset, i;
3469 
3470 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3471 
3472 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3473 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3474 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3475 }
3476 
3477 static void gaudi2_tpc_stall(struct hl_device *hdev)
3478 {
3479 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3480 	u32 reg_base;
3481 	int i;
3482 
3483 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3484 		return;
3485 
3486 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3487 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3488 			continue;
3489 
3490 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3491 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3492 	}
3493 }
3494 
3495 static void gaudi2_rotator_stall(struct hl_device *hdev)
3496 {
3497 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3498 	u32 reg_val;
3499 	int i;
3500 
3501 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3502 		return;
3503 
3504 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3505 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3506 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3507 
3508 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3509 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3510 			continue;
3511 
3512 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3513 	}
3514 }
3515 
3516 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3517 {
3518 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3519 }
3520 
3521 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3522 {
3523 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3524 	int dcore, inst;
3525 
3526 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3527 		goto stop_edma_qmans;
3528 
3529 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3530 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3531 
3532 stop_edma_qmans:
3533 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3534 		return;
3535 
3536 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3537 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3538 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3539 			u32 qm_base;
3540 
3541 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3542 				continue;
3543 
3544 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3545 					inst * DCORE_EDMA_OFFSET;
3546 
3547 			/* Disable CPs of EDMA QMANs */
3548 			gaudi2_disable_qman_common(hdev, qm_base);
3549 		}
3550 	}
3551 }
3552 
3553 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3554 {
3555 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3556 	u32 offset, i;
3557 
3558 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3559 
3560 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3561 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3562 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3563 }
3564 
3565 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3566 {
3567 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3568 	u32 reg_base;
3569 	int i;
3570 
3571 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3572 		return;
3573 
3574 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3575 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3576 			continue;
3577 
3578 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3579 		gaudi2_disable_qman_common(hdev, reg_base);
3580 	}
3581 }
3582 
3583 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3584 {
3585 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3586 	u32 reg_base;
3587 	int i;
3588 
3589 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3590 		return;
3591 
3592 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3593 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3594 			continue;
3595 
3596 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3597 		gaudi2_disable_qman_common(hdev, reg_base);
3598 	}
3599 }
3600 
3601 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3602 {
3603 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3604 	u32 reg_base, queue_id;
3605 	int i;
3606 
3607 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3608 		return;
3609 
3610 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3611 
3612 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3613 		if (!(hdev->nic_ports_mask & BIT(i)))
3614 			continue;
3615 
3616 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3617 		gaudi2_disable_qman_common(hdev, reg_base);
3618 	}
3619 }
3620 
3621 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3622 {
3623 	/* Disable the timestamp counter */
3624 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3625 
3626 	/* Zero the lower/upper parts of the 64-bit counter */
3627 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3628 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3629 
3630 	/* Enable the counter */
3631 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3632 }
3633 
3634 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3635 {
3636 	/* Disable the timestamp counter */
3637 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3638 }
3639 
3640 static const char *gaudi2_irq_name(u16 irq_number)
3641 {
3642 	switch (irq_number) {
3643 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3644 		return "gaudi2 cpu eq";
3645 	case GAUDI2_IRQ_NUM_COMPLETION:
3646 		return "gaudi2 completion";
3647 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3648 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3649 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3650 		return "gaudi2 user completion";
3651 	default:
3652 		return "invalid";
3653 	}
3654 }
3655 
3656 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3657 {
3658 	int i, irq, relative_idx;
3659 	struct hl_dec *dec;
3660 
3661 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3662 		irq = pci_irq_vector(hdev->pdev, i);
3663 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3664 
3665 		dec = hdev->dec + relative_idx / 2;
3666 
3667 		/* We pass different structures depending on the irq handler. For the abnormal
3668 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3669 		 * user_interrupt entry
3670 		 */
3671 		free_irq(irq, ((relative_idx % 2) ?
3672 				(void *) dec :
3673 				(void *) &hdev->user_interrupt[dec->core_id]));
3674 	}
3675 }
3676 
3677 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3678 {
3679 	int rc, i, irq_init_cnt, irq, relative_idx;
3680 	irq_handler_t irq_handler;
3681 	struct hl_dec *dec;
3682 
3683 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3684 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3685 			i++, irq_init_cnt++) {
3686 
3687 		irq = pci_irq_vector(hdev->pdev, i);
3688 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3689 
3690 		irq_handler = (relative_idx % 2) ?
3691 				hl_irq_handler_dec_abnrm :
3692 				hl_irq_handler_user_interrupt;
3693 
3694 		dec = hdev->dec + relative_idx / 2;
3695 
3696 		/* We pass different structures depending on the irq handler. For the abnormal
3697 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3698 		 * user_interrupt entry
3699 		 */
3700 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3701 				((relative_idx % 2) ?
3702 				(void *) dec :
3703 				(void *) &hdev->user_interrupt[dec->core_id]));
3704 		if (rc) {
3705 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3706 			goto free_dec_irqs;
3707 		}
3708 	}
3709 
3710 	return 0;
3711 
3712 free_dec_irqs:
3713 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3714 	return rc;
3715 }
3716 
3717 static int gaudi2_enable_msix(struct hl_device *hdev)
3718 {
3719 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3720 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3721 	int rc, irq, i, j, user_irq_init_cnt;
3722 	irq_handler_t irq_handler;
3723 	struct hl_cq *cq;
3724 
3725 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3726 		return 0;
3727 
3728 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3729 					PCI_IRQ_MSIX);
3730 	if (rc < 0) {
3731 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3732 			GAUDI2_MSIX_ENTRIES, rc);
3733 		return rc;
3734 	}
3735 
3736 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3737 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3738 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3739 	if (rc) {
3740 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3741 		goto free_irq_vectors;
3742 	}
3743 
3744 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3745 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3746 			&hdev->event_queue);
3747 	if (rc) {
3748 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3749 		goto free_completion_irq;
3750 	}
3751 
3752 	rc = gaudi2_dec_enable_msix(hdev);
3753 	if (rc) {
3754 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
3755 		goto free_event_irq;
3756 	}
3757 
3758 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3759 			user_irq_init_cnt < prop->user_interrupt_count;
3760 			i++, j++, user_irq_init_cnt++) {
3761 
3762 		irq = pci_irq_vector(hdev->pdev, i);
3763 		irq_handler = hl_irq_handler_user_interrupt;
3764 
3765 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3766 		if (rc) {
3767 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3768 			goto free_user_irq;
3769 		}
3770 	}
3771 
3772 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3773 
3774 	return 0;
3775 
3776 free_user_irq:
3777 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3778 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3779 
3780 		irq = pci_irq_vector(hdev->pdev, i);
3781 		free_irq(irq, &hdev->user_interrupt[j]);
3782 	}
3783 
3784 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3785 
3786 free_event_irq:
3787 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3788 	free_irq(irq, cq);
3789 
3790 free_completion_irq:
3791 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3792 	free_irq(irq, cq);
3793 
3794 free_irq_vectors:
3795 	pci_free_irq_vectors(hdev->pdev);
3796 
3797 	return rc;
3798 }
3799 
3800 static void gaudi2_sync_irqs(struct hl_device *hdev)
3801 {
3802 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3803 	int i, j;
3804 	int irq;
3805 
3806 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3807 		return;
3808 
3809 	/* Wait for all pending IRQs to be finished */
3810 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3811 
3812 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3813 		irq = pci_irq_vector(hdev->pdev, i);
3814 		synchronize_irq(irq);
3815 	}
3816 
3817 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3818 										i++, j++) {
3819 		irq = pci_irq_vector(hdev->pdev, i);
3820 		synchronize_irq(irq);
3821 	}
3822 
3823 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3824 }
3825 
3826 static void gaudi2_disable_msix(struct hl_device *hdev)
3827 {
3828 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3829 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3830 	struct hl_cq *cq;
3831 	int irq, i, j, k;
3832 
3833 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3834 		return;
3835 
3836 	gaudi2_sync_irqs(hdev);
3837 
3838 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3839 	free_irq(irq, &hdev->event_queue);
3840 
3841 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3842 
3843 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3844 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3845 
3846 		irq = pci_irq_vector(hdev->pdev, i);
3847 		free_irq(irq, &hdev->user_interrupt[j]);
3848 	}
3849 
3850 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3851 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3852 	free_irq(irq, cq);
3853 
3854 	pci_free_irq_vectors(hdev->pdev);
3855 
3856 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3857 }
3858 
3859 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3860 {
3861 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3862 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3863 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3864 	int rc;
3865 
3866 	if (hdev->pldm)
3867 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3868 	else
3869 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3870 
3871 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3872 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3873 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3874 			continue;
3875 
3876 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3877 
3878 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3879 
3880 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3881 
3882 		/* Wait till all traffic from decoder stops
3883 		 * before apply core reset.
3884 		 */
3885 		rc = hl_poll_timeout(
3886 				hdev,
3887 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3888 				graceful,
3889 				(graceful & graceful_pend_mask),
3890 				100,
3891 				timeout_usec);
3892 		if (rc)
3893 			dev_err(hdev->dev,
3894 				"Failed to stop traffic from DCORE%d Decoder %d\n",
3895 				dcore_id, dec_id);
3896 	}
3897 }
3898 
3899 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3900 {
3901 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3902 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3903 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3904 	int rc;
3905 
3906 	if (hdev->pldm)
3907 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3908 	else
3909 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3910 
3911 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3912 		dec_bit = PCIE_DEC_SHIFT + dec_id;
3913 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3914 			continue;
3915 
3916 		offset = dec_id * PCIE_VDEC_OFFSET;
3917 
3918 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3919 
3920 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3921 
3922 		/* Wait till all traffic from decoder stops
3923 		 * before apply core reset.
3924 		 */
3925 		rc = hl_poll_timeout(
3926 				hdev,
3927 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3928 				graceful,
3929 				(graceful & graceful_pend_mask),
3930 				100,
3931 				timeout_usec);
3932 		if (rc)
3933 			dev_err(hdev->dev,
3934 				"Failed to stop traffic from PCIe Decoder %d\n",
3935 				dec_id);
3936 	}
3937 }
3938 
3939 static void gaudi2_stop_dec(struct hl_device *hdev)
3940 {
3941 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3942 	int dcore_id;
3943 
3944 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3945 		return;
3946 
3947 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3948 		gaudi2_stop_dcore_dec(hdev, dcore_id);
3949 
3950 	gaudi2_stop_pcie_dec(hdev);
3951 }
3952 
3953 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3954 {
3955 	u32 reg_base, reg_val;
3956 
3957 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3958 	if (run_mode == HL_ENGINE_CORE_RUN)
3959 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3960 	else
3961 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3962 
3963 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3964 }
3965 
3966 static void gaudi2_halt_arcs(struct hl_device *hdev)
3967 {
3968 	u16 arc_id;
3969 
3970 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3971 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3972 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3973 	}
3974 }
3975 
3976 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3977 {
3978 	int rc;
3979 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
3980 
3981 	if (hdev->pldm)
3982 		timeout_usec *= 100;
3983 
3984 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3985 	if (run_mode == HL_ENGINE_CORE_RUN)
3986 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3987 	else
3988 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3989 
3990 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3991 				val, ((val & ack_mask) == ack_mask),
3992 				1000, timeout_usec);
3993 
3994 	if (!rc) {
3995 		/* Clear */
3996 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3997 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3998 	}
3999 
4000 	return rc;
4001 }
4002 
4003 static void gaudi2_reset_arcs(struct hl_device *hdev)
4004 {
4005 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4006 	u16 arc_id;
4007 
4008 	if (!gaudi2)
4009 		return;
4010 
4011 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4012 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4013 			gaudi2_clr_arc_id_cap(hdev, arc_id);
4014 }
4015 
4016 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4017 {
4018 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4019 	u32 queue_id;
4020 	int i;
4021 
4022 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4023 		return;
4024 
4025 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4026 
4027 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4028 		if (!(hdev->nic_ports_mask & BIT(i)))
4029 			continue;
4030 
4031 		gaudi2_qman_manual_flush_common(hdev, queue_id);
4032 	}
4033 }
4034 
4035 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4036 					u32 num_cores, u32 core_command)
4037 {
4038 	int i, rc;
4039 
4040 
4041 	for (i = 0 ; i < num_cores ; i++) {
4042 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4043 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4044 	}
4045 
4046 	for (i = 0 ; i < num_cores ; i++) {
4047 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4048 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4049 
4050 			if (rc) {
4051 				dev_err(hdev->dev, "failed to %s arc: %d\n",
4052 					(core_command == HL_ENGINE_CORE_HALT) ?
4053 					"HALT" : "RUN", core_ids[i]);
4054 				return -1;
4055 			}
4056 		}
4057 	}
4058 
4059 	return 0;
4060 }
4061 
4062 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4063 {
4064 	u32 wait_timeout_ms;
4065 
4066 	if (hdev->pldm)
4067 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4068 	else
4069 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4070 
4071 	if (fw_reset)
4072 		goto skip_engines;
4073 
4074 	gaudi2_stop_dma_qmans(hdev);
4075 	gaudi2_stop_mme_qmans(hdev);
4076 	gaudi2_stop_tpc_qmans(hdev);
4077 	gaudi2_stop_rot_qmans(hdev);
4078 	gaudi2_stop_nic_qmans(hdev);
4079 	msleep(wait_timeout_ms);
4080 
4081 	gaudi2_halt_arcs(hdev);
4082 	gaudi2_dma_stall(hdev);
4083 	gaudi2_mme_stall(hdev);
4084 	gaudi2_tpc_stall(hdev);
4085 	gaudi2_rotator_stall(hdev);
4086 
4087 	msleep(wait_timeout_ms);
4088 
4089 	gaudi2_stop_dec(hdev);
4090 
4091 	/*
4092 	 * in case of soft reset do a manual flush for QMANs (currently called
4093 	 * only for NIC QMANs
4094 	 */
4095 	if (!hard_reset)
4096 		gaudi2_nic_qmans_manual_flush(hdev);
4097 
4098 	gaudi2_disable_dma_qmans(hdev);
4099 	gaudi2_disable_mme_qmans(hdev);
4100 	gaudi2_disable_tpc_qmans(hdev);
4101 	gaudi2_disable_rot_qmans(hdev);
4102 	gaudi2_disable_nic_qmans(hdev);
4103 	gaudi2_disable_timestamp(hdev);
4104 
4105 skip_engines:
4106 	if (hard_reset) {
4107 		gaudi2_disable_msix(hdev);
4108 		return;
4109 	}
4110 
4111 	gaudi2_sync_irqs(hdev);
4112 }
4113 
4114 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4115 {
4116 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4117 
4118 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4119 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4120 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4121 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4122 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4123 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4124 }
4125 
4126 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4127 {
4128 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4129 	struct dynamic_fw_load_mgr *dynamic_loader;
4130 	struct cpu_dyn_regs *dyn_regs;
4131 
4132 	/* fill common fields */
4133 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4134 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4135 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4136 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4137 	fw_loader->skip_bmc = false;
4138 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4139 	fw_loader->dram_bar_id = DRAM_BAR_ID;
4140 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4141 
4142 	/* here we update initial values for few specific dynamic regs (as
4143 	 * before reading the first descriptor from FW those value has to be
4144 	 * hard-coded). in later stages of the protocol those values will be
4145 	 * updated automatically by reading the FW descriptor so data there
4146 	 * will always be up-to-date
4147 	 */
4148 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4149 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4150 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4151 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4152 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4153 }
4154 
4155 static int gaudi2_init_cpu(struct hl_device *hdev)
4156 {
4157 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4158 	int rc;
4159 
4160 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4161 		return 0;
4162 
4163 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4164 		return 0;
4165 
4166 	rc = hl_fw_init_cpu(hdev);
4167 	if (rc)
4168 		return rc;
4169 
4170 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4171 
4172 	return 0;
4173 }
4174 
4175 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4176 {
4177 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4178 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4179 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4180 	struct cpu_dyn_regs *dyn_regs;
4181 	struct hl_eq *eq;
4182 	u32 status;
4183 	int err;
4184 
4185 	if (!hdev->cpu_queues_enable)
4186 		return 0;
4187 
4188 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4189 		return 0;
4190 
4191 	eq = &hdev->event_queue;
4192 
4193 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4194 
4195 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4196 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4197 
4198 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4199 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4200 
4201 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4202 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4203 
4204 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4205 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4206 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4207 
4208 	/* Used for EQ CI */
4209 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4210 
4211 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4212 
4213 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4214 
4215 	/* Let the ARC know we are ready as it is now handling those queues  */
4216 
4217 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4218 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4219 
4220 	err = hl_poll_timeout(
4221 		hdev,
4222 		mmCPU_IF_QUEUE_INIT,
4223 		status,
4224 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4225 		1000,
4226 		cpu_timeout);
4227 
4228 	if (err) {
4229 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4230 		return -EIO;
4231 	}
4232 
4233 	/* update FW application security bits */
4234 	if (prop->fw_cpu_boot_dev_sts0_valid)
4235 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4236 
4237 	if (prop->fw_cpu_boot_dev_sts1_valid)
4238 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4239 
4240 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4241 	return 0;
4242 }
4243 
4244 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4245 				u32 queue_id_base)
4246 {
4247 	struct hl_hw_queue *q;
4248 	u32 pq_id, pq_offset;
4249 
4250 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4251 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4252 		pq_offset = pq_id * 4;
4253 
4254 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4255 				lower_32_bits(q->bus_address));
4256 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4257 				upper_32_bits(q->bus_address));
4258 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4259 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4260 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4261 	}
4262 }
4263 
4264 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4265 {
4266 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4267 
4268 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4269 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4270 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4271 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4272 
4273 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4274 		cp_offset = cp_id * 4;
4275 
4276 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4277 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4278 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4279 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4280 	}
4281 
4282 	/* allow QMANs to accept work from ARC CQF */
4283 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4284 }
4285 
4286 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4287 				u32 queue_id_base)
4288 {
4289 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4290 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4291 
4292 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4293 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4294 
4295 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4296 		pq_offset = pq_id * 4;
4297 
4298 		/* Configure QMAN HBW to scratchpad as it is not needed */
4299 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4300 				lower_32_bits(gaudi2->scratchpad_bus_address));
4301 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4302 				upper_32_bits(gaudi2->scratchpad_bus_address));
4303 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4304 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4305 
4306 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4307 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4308 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4309 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4310 	}
4311 
4312 	/* Enable QMAN H/W completion */
4313 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4314 }
4315 
4316 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4317 {
4318 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4319 	u32 sp_reg_addr;
4320 
4321 	switch (queue_id_base) {
4322 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4323 		fallthrough;
4324 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4325 		fallthrough;
4326 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4327 		fallthrough;
4328 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4329 		fallthrough;
4330 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4331 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4332 		break;
4333 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4334 		fallthrough;
4335 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4336 		fallthrough;
4337 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4338 		fallthrough;
4339 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4340 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4341 		break;
4342 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4343 		fallthrough;
4344 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4345 		fallthrough;
4346 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4347 		fallthrough;
4348 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4349 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4350 		break;
4351 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4352 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4353 		break;
4354 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4355 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4356 		break;
4357 	default:
4358 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4359 		return 0;
4360 	}
4361 
4362 	return sp_reg_addr;
4363 }
4364 
4365 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4366 					u32 queue_id_base)
4367 {
4368 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4369 	int map_table_entry;
4370 
4371 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4372 
4373 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4374 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4375 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4376 
4377 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4378 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4379 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4380 
4381 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4382 
4383 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4384 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4385 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4386 
4387 	/* Enable the QMAN channel.
4388 	 * PDMA QMAN configuration is different, as we do not allow user to
4389 	 * access some of the CPs.
4390 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4391 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4392 	 */
4393 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4394 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4395 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4396 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4397 	else
4398 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4399 }
4400 
4401 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4402 		u32 queue_id_base)
4403 {
4404 	u32 pq_id;
4405 
4406 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4407 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4408 
4409 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4410 	gaudi2_init_qman_cp(hdev, reg_base);
4411 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4412 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4413 }
4414 
4415 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4416 				u32 dma_core_id, bool is_secure)
4417 {
4418 	u32 prot, irq_handler_offset;
4419 	struct cpu_dyn_regs *dyn_regs;
4420 	int map_table_entry;
4421 
4422 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4423 	if (is_secure)
4424 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4425 
4426 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4427 
4428 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4429 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4430 
4431 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4432 			lower_32_bits(CFG_BASE + irq_handler_offset));
4433 
4434 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4435 			upper_32_bits(CFG_BASE + irq_handler_offset));
4436 
4437 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4438 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4439 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4440 
4441 	/* Enable the DMA channel */
4442 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4443 }
4444 
4445 static void gaudi2_init_kdma(struct hl_device *hdev)
4446 {
4447 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4448 	u32 reg_base;
4449 
4450 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4451 		return;
4452 
4453 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4454 
4455 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4456 
4457 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4458 }
4459 
4460 static void gaudi2_init_pdma(struct hl_device *hdev)
4461 {
4462 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4463 	u32 reg_base;
4464 
4465 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4466 		return;
4467 
4468 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4469 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4470 
4471 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4472 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4473 
4474 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4475 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4476 
4477 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4478 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4479 
4480 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4481 }
4482 
4483 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4484 {
4485 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
4486 
4487 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4488 	base_edma_qman_id = edma_stream_base[seq];
4489 
4490 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4491 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4492 
4493 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4494 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4495 }
4496 
4497 static void gaudi2_init_edma(struct hl_device *hdev)
4498 {
4499 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4500 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4501 	int dcore, inst;
4502 
4503 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4504 		return;
4505 
4506 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4507 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4508 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4509 
4510 			if (!(prop->edma_enabled_mask & BIT(seq)))
4511 				continue;
4512 
4513 			gaudi2_init_edma_instance(hdev, seq);
4514 
4515 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4516 		}
4517 	}
4518 }
4519 
4520 /*
4521  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4522  * @hdev: pointer to habanalabs device structure.
4523  * @sob_id: sync object ID.
4524  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4525  * @interrupt_id: interrupt ID.
4526  *
4527  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4528  * write directly to the HBW host memory of the virtual MSI-X doorbell.
4529  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4530  *
4531  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4532  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4533  * completion, by decrementing the sync object value and re-arming the monitor.
4534  */
4535 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4536 							u32 first_mon_id, u32 interrupt_id)
4537 {
4538 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4539 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4540 	u64 addr;
4541 	u8 mask;
4542 
4543 	/* Reset the SOB value */
4544 	sob_offset = sob_id * sizeof(u32);
4545 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4546 
4547 	/* Configure 3 monitors:
4548 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4549 	 * 2. Decrement SOB value by 1.
4550 	 * 3. Re-arm the master monitor.
4551 	 */
4552 
4553 	first_mon_offset = first_mon_id * sizeof(u32);
4554 
4555 	/* 2nd monitor: Decrement SOB value by 1 */
4556 	mon_offset = first_mon_offset + sizeof(u32);
4557 
4558 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4559 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4560 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4561 
4562 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4563 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4564 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4565 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4566 
4567 	/* 3rd monitor: Re-arm the master monitor */
4568 	mon_offset = first_mon_offset + 2 * sizeof(u32);
4569 
4570 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4571 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4572 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4573 
4574 	sob_group = sob_id / 8;
4575 	mask = ~BIT(sob_id & 0x7);
4576 	mode = 0; /* comparison mode is "greater than or equal to" */
4577 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4578 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4579 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4580 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4581 
4582 	payload = arm;
4583 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4584 
4585 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4586 	mon_offset = first_mon_offset;
4587 
4588 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4589 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4590 
4591 	addr = gaudi2->virt_msix_db_dma_addr;
4592 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4593 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4594 
4595 	payload = interrupt_id;
4596 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4597 
4598 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4599 }
4600 
4601 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4602 {
4603 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4604 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4605 
4606 	/* Decoder normal/abnormal interrupts */
4607 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4608 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4609 			continue;
4610 
4611 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4612 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4613 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4614 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4615 
4616 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4617 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4618 		interrupt_id += 1;
4619 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4620 	}
4621 }
4622 
4623 static void gaudi2_init_sm(struct hl_device *hdev)
4624 {
4625 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4626 	u64 cq_address;
4627 	u32 reg_val;
4628 	int i;
4629 
4630 	/* Enable HBW/LBW CQ for completion monitors */
4631 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4632 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4633 
4634 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4635 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4636 
4637 	/* Enable only HBW CQ for KDMA completion monitor */
4638 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4639 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4640 
4641 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
4642 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4643 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4644 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4645 
4646 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4647 		cq_address =
4648 			hdev->completion_queue[i].bus_address;
4649 
4650 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4651 							lower_32_bits(cq_address));
4652 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4653 							upper_32_bits(cq_address));
4654 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4655 							ilog2(HL_CQ_SIZE_IN_BYTES));
4656 	}
4657 
4658 	/* Configure kernel ASID and MMU BP*/
4659 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4660 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4661 
4662 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4663 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
4664 }
4665 
4666 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4667 {
4668 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4669 	u32 reg_val;
4670 	int i;
4671 
4672 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4673 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4674 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4675 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4676 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4677 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4678 
4679 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4680 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4681 
4682 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4683 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4684 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4685 	}
4686 }
4687 
4688 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4689 							bool config_qman_only)
4690 {
4691 	u32 queue_id_base, reg_base;
4692 
4693 	switch (dcore_id) {
4694 	case 0:
4695 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4696 		break;
4697 	case 1:
4698 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4699 		break;
4700 	case 2:
4701 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4702 		break;
4703 	case 3:
4704 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4705 		break;
4706 	default:
4707 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4708 		return;
4709 	}
4710 
4711 	if (!config_qman_only) {
4712 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4713 		gaudi2_init_mme_acc(hdev, reg_base);
4714 	}
4715 
4716 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4717 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
4718 }
4719 
4720 static void gaudi2_init_mme(struct hl_device *hdev)
4721 {
4722 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4723 	int i;
4724 
4725 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4726 		return;
4727 
4728 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4729 		gaudi2_init_dcore_mme(hdev, i, false);
4730 
4731 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4732 	}
4733 }
4734 
4735 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4736 {
4737 	/* Mask arithmetic and QM interrupts in TPC */
4738 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4739 
4740 	/* Set 16 cache lines */
4741 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4742 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4743 }
4744 
4745 struct gaudi2_tpc_init_cfg_data {
4746 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4747 };
4748 
4749 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4750 					u32 offset, struct iterate_module_ctx *ctx)
4751 {
4752 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4753 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4754 	u32 queue_id_base;
4755 	u8 seq;
4756 
4757 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4758 
4759 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4760 		/* gets last sequence number */
4761 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4762 	else
4763 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4764 
4765 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4766 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4767 
4768 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4769 }
4770 
4771 static void gaudi2_init_tpc(struct hl_device *hdev)
4772 {
4773 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4774 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
4775 	struct iterate_module_ctx tpc_iter;
4776 
4777 	if (!hdev->asic_prop.tpc_enabled_mask)
4778 		return;
4779 
4780 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4781 		return;
4782 
4783 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4784 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4785 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4786 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4787 	tpc_iter.fn = &gaudi2_init_tpc_config;
4788 	tpc_iter.data = &init_cfg_data;
4789 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
4790 }
4791 
4792 static void gaudi2_init_rotator(struct hl_device *hdev)
4793 {
4794 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4795 	u32 i, reg_base, queue_id;
4796 
4797 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4798 
4799 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4800 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4801 		gaudi2_init_qman(hdev, reg_base, queue_id);
4802 
4803 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4804 	}
4805 }
4806 
4807 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4808 {
4809 	u32 sob_id;
4810 
4811 	/* VCMD normal interrupt */
4812 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4813 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4814 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4815 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4816 
4817 	/* VCMD abnormal interrupt */
4818 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4819 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4820 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4821 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4822 }
4823 
4824 static void gaudi2_init_dec(struct hl_device *hdev)
4825 {
4826 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4827 	u32 dcore_id, dec_id, dec_bit;
4828 	u64 base_addr;
4829 
4830 	if (!hdev->asic_prop.decoder_enabled_mask)
4831 		return;
4832 
4833 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4834 		return;
4835 
4836 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4837 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4838 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4839 
4840 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4841 				continue;
4842 
4843 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
4844 					BRDG_CTRL_BLOCK_OFFSET +
4845 					dcore_id * DCORE_OFFSET +
4846 					dec_id * DCORE_VDEC_OFFSET;
4847 
4848 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4849 
4850 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4851 		}
4852 
4853 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4854 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4855 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4856 			continue;
4857 
4858 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4859 				dec_id * DCORE_VDEC_OFFSET;
4860 
4861 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4862 
4863 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4864 	}
4865 }
4866 
4867 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4868 					u32 stlb_base, u32 asid, u64 phys_addr)
4869 {
4870 	u32 status, timeout_usec;
4871 	int rc;
4872 
4873 	if (hdev->pldm || !hdev->pdev)
4874 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4875 	else
4876 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4877 
4878 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4879 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4880 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4881 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4882 
4883 	rc = hl_poll_timeout(
4884 		hdev,
4885 		stlb_base + STLB_BUSY_OFFSET,
4886 		status,
4887 		!(status & 0x80000000),
4888 		1000,
4889 		timeout_usec);
4890 
4891 	if (rc) {
4892 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4893 		return rc;
4894 	}
4895 
4896 	return 0;
4897 }
4898 
4899 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4900 					u32 start_offset, u32 inv_start_val,
4901 					u32 flags)
4902 {
4903 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
4904 	if (flags & MMU_OP_CLEAR_MEMCACHE)
4905 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4906 
4907 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4908 		return;
4909 
4910 	WREG32(stlb_base + start_offset, inv_start_val);
4911 }
4912 
4913 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4914 						struct gaudi2_cache_invld_params *inv_params)
4915 {
4916 	u32 status, timeout_usec, start_offset;
4917 	int rc;
4918 
4919 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4920 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4921 
4922 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
4923 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4924 		rc = hl_poll_timeout(
4925 			hdev,
4926 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4927 			status,
4928 			status & 0x1,
4929 			1000,
4930 			timeout_usec);
4931 
4932 		if (rc)
4933 			return rc;
4934 
4935 		/* Need to manually reset the status to 0 */
4936 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4937 	}
4938 
4939 	/* Lower cache does not work with cache lines, hence we can skip its
4940 	 * invalidation upon map and invalidate only upon unmap
4941 	 */
4942 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4943 		return 0;
4944 
4945 	start_offset = inv_params->range_invalidation ?
4946 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4947 
4948 	rc = hl_poll_timeout(
4949 		hdev,
4950 		stlb_base + start_offset,
4951 		status,
4952 		!(status & 0x1),
4953 		1000,
4954 		timeout_usec);
4955 
4956 	return rc;
4957 }
4958 
4959 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4960 {
4961 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4962 	u32 hw_cap;
4963 
4964 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4965 
4966 	if (gaudi2->hw_cap_initialized & hw_cap)
4967 		return true;
4968 
4969 	return false;
4970 }
4971 
4972 /* this function shall be called only for HMMUs for which capability bit is set */
4973 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4974 {
4975 	u32 offset;
4976 
4977 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4978 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4979 }
4980 
4981 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4982 						struct gaudi2_cache_invld_params *inv_params)
4983 {
4984 	u32 start_offset;
4985 
4986 	if (inv_params->range_invalidation) {
4987 		/* Set the addresses range
4988 		 * Note: that the start address we set in register, is not included in
4989 		 * the range of the invalidation, by design.
4990 		 * that's why we need to set lower address than the one we actually
4991 		 * want to be included in the range invalidation.
4992 		 */
4993 		u64 start = inv_params->start_va - 1;
4994 
4995 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4996 
4997 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4998 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4999 
5000 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5001 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5002 
5003 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5004 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5005 
5006 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5007 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5008 	} else {
5009 		start_offset = STLB_INV_ALL_START_OFFSET;
5010 	}
5011 
5012 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5013 						inv_params->inv_start_val, inv_params->flags);
5014 }
5015 
5016 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5017 						int dcore_id, int hmmu_id,
5018 						struct gaudi2_cache_invld_params *inv_params)
5019 {
5020 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5021 
5022 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5023 }
5024 
5025 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5026 						int dcore_id, int hmmu_id,
5027 						struct gaudi2_cache_invld_params *inv_params)
5028 {
5029 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5030 
5031 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5032 }
5033 
5034 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5035 						struct gaudi2_cache_invld_params *inv_params)
5036 {
5037 	int dcore_id, hmmu_id;
5038 
5039 	/* first send all invalidation commands */
5040 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5041 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5042 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5043 				continue;
5044 
5045 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5046 		}
5047 	}
5048 
5049 	/* next, poll all invalidations status */
5050 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5051 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5052 			int rc;
5053 
5054 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5055 				continue;
5056 
5057 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5058 										inv_params);
5059 			if (rc)
5060 				return rc;
5061 		}
5062 	}
5063 
5064 	return 0;
5065 }
5066 
5067 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5068 {
5069 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5070 	struct gaudi2_cache_invld_params invld_params;
5071 	int rc = 0;
5072 
5073 	if (hdev->reset_info.hard_reset_pending)
5074 		return rc;
5075 
5076 	invld_params.range_invalidation = false;
5077 	invld_params.inv_start_val = 1;
5078 
5079 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5080 		invld_params.flags = flags;
5081 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5082 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5083 										&invld_params);
5084 	} else if (flags & MMU_OP_PHYS_PACK) {
5085 		invld_params.flags = 0;
5086 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5087 	}
5088 
5089 	return rc;
5090 }
5091 
5092 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5093 				u32 flags, u32 asid, u64 va, u64 size)
5094 {
5095 	struct gaudi2_cache_invld_params invld_params = {0};
5096 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5097 	u64 start_va, end_va;
5098 	u32 inv_start_val;
5099 	int rc = 0;
5100 
5101 	if (hdev->reset_info.hard_reset_pending)
5102 		return 0;
5103 
5104 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5105 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5106 			asid << MMU_RANGE_INV_ASID_SHIFT);
5107 	start_va = va;
5108 	end_va = start_va + size;
5109 
5110 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5111 		/* As range invalidation does not support zero address we will
5112 		 * do full invalidation in this case
5113 		 */
5114 		if (start_va) {
5115 			invld_params.range_invalidation = true;
5116 			invld_params.start_va = start_va;
5117 			invld_params.end_va = end_va;
5118 			invld_params.inv_start_val = inv_start_val;
5119 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5120 		} else {
5121 			invld_params.range_invalidation = false;
5122 			invld_params.inv_start_val = 1;
5123 			invld_params.flags = flags;
5124 		}
5125 
5126 
5127 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5128 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5129 										&invld_params);
5130 		if (rc)
5131 			return rc;
5132 
5133 	} else if (flags & MMU_OP_PHYS_PACK) {
5134 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5135 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5136 		invld_params.inv_start_val = inv_start_val;
5137 		invld_params.flags = flags;
5138 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5139 	}
5140 
5141 	return rc;
5142 }
5143 
5144 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5145 {
5146 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5147 	u64 hop0_addr;
5148 	u32 asid, max_asid = prop->max_asid;
5149 	int rc;
5150 
5151 	/* it takes too much time to init all of the ASIDs on palladium */
5152 	if (hdev->pldm)
5153 		max_asid = min((u32) 8, max_asid);
5154 
5155 	for (asid = 0 ; asid < max_asid ; asid++) {
5156 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5157 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5158 		if (rc) {
5159 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5160 			return rc;
5161 		}
5162 	}
5163 
5164 	return 0;
5165 }
5166 
5167 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5168 {
5169 	u32 status, timeout_usec;
5170 	int rc;
5171 
5172 	if (hdev->pldm || !hdev->pdev)
5173 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5174 	else
5175 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5176 
5177 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5178 
5179 	rc = hl_poll_timeout(
5180 		hdev,
5181 		stlb_base + STLB_SRAM_INIT_OFFSET,
5182 		status,
5183 		!status,
5184 		1000,
5185 		timeout_usec);
5186 
5187 	if (rc)
5188 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5189 
5190 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5191 	if (rc)
5192 		return rc;
5193 
5194 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5195 
5196 	rc = hl_poll_timeout(
5197 		hdev,
5198 		stlb_base + STLB_INV_ALL_START_OFFSET,
5199 		status,
5200 		!status,
5201 		1000,
5202 		timeout_usec);
5203 
5204 	if (rc)
5205 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5206 
5207 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5208 
5209 	return rc;
5210 }
5211 
5212 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5213 {
5214 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5215 	u32 mmu_base, stlb_base;
5216 	int rc;
5217 
5218 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5219 		return 0;
5220 
5221 	mmu_base = mmPMMU_HBW_MMU_BASE;
5222 	stlb_base = mmPMMU_HBW_STLB_BASE;
5223 
5224 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5225 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5226 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5227 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5228 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5229 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5230 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5231 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5232 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5233 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5234 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5235 
5236 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5237 
5238 	if (PAGE_SIZE == SZ_64K) {
5239 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5240 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5241 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5242 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5243 			FIELD_PREP(
5244 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5245 				1),
5246 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5247 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5248 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5249 	}
5250 
5251 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5252 
5253 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5254 	if (rc)
5255 		return rc;
5256 
5257 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5258 
5259 	return 0;
5260 }
5261 
5262 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5263 				int hmmu_id)
5264 {
5265 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5266 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5267 	u32 offset, mmu_base, stlb_base, hw_cap;
5268 	u8 dmmu_seq;
5269 	int rc;
5270 
5271 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5272 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5273 
5274 	/*
5275 	 * return if DMMU is already initialized or if it's not out of
5276 	 * isolation (due to cluster binning)
5277 	 */
5278 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5279 		return 0;
5280 
5281 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5282 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5283 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5284 
5285 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5286 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5287 
5288 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5289 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5290 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5291 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5292 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5293 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5294 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5295 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5296 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5297 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5298 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5299 
5300 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5301 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5302 
5303 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5304 
5305 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5306 	if (rc)
5307 		return rc;
5308 
5309 	gaudi2->hw_cap_initialized |= hw_cap;
5310 
5311 	return 0;
5312 }
5313 
5314 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5315 {
5316 	int rc, dcore_id, hmmu_id;
5317 
5318 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5319 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5320 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5321 			if (rc)
5322 				return rc;
5323 		}
5324 
5325 	return 0;
5326 }
5327 
5328 static int gaudi2_mmu_init(struct hl_device *hdev)
5329 {
5330 	int rc;
5331 
5332 	rc = gaudi2_pci_mmu_init(hdev);
5333 	if (rc)
5334 		return rc;
5335 
5336 	rc = gaudi2_hbm_mmu_init(hdev);
5337 	if (rc)
5338 		return rc;
5339 
5340 	return 0;
5341 }
5342 
5343 static int gaudi2_hw_init(struct hl_device *hdev)
5344 {
5345 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5346 	int rc;
5347 
5348 	/* Let's mark in the H/W that we have reached this point. We check
5349 	 * this value in the reset_before_init function to understand whether
5350 	 * we need to reset the chip before doing H/W init. This register is
5351 	 * cleared by the H/W upon H/W reset
5352 	 */
5353 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5354 
5355 	/* Perform read from the device to make sure device is up */
5356 	RREG32(mmHW_STATE);
5357 
5358 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5359 	 * So we set it here and if anyone tries to move it later to
5360 	 * a different address, there will be an error
5361 	 */
5362 	if (hdev->asic_prop.iatu_done_by_fw)
5363 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5364 
5365 	/*
5366 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5367 	 * base address of dram
5368 	 */
5369 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5370 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5371 		return -EIO;
5372 	}
5373 
5374 	rc = gaudi2_init_cpu(hdev);
5375 	if (rc) {
5376 		dev_err(hdev->dev, "failed to initialize CPU\n");
5377 		return rc;
5378 	}
5379 
5380 	gaudi2_init_scrambler_hbm(hdev);
5381 	gaudi2_init_kdma(hdev);
5382 
5383 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5384 	if (rc) {
5385 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5386 		return rc;
5387 	}
5388 
5389 	rc = gaudi2->cpucp_info_get(hdev);
5390 	if (rc) {
5391 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5392 		return rc;
5393 	}
5394 
5395 	rc = gaudi2_mmu_init(hdev);
5396 	if (rc)
5397 		return rc;
5398 
5399 	gaudi2_init_pdma(hdev);
5400 	gaudi2_init_edma(hdev);
5401 	gaudi2_init_sm(hdev);
5402 	gaudi2_init_tpc(hdev);
5403 	gaudi2_init_mme(hdev);
5404 	gaudi2_init_rotator(hdev);
5405 	gaudi2_init_dec(hdev);
5406 	gaudi2_enable_timestamp(hdev);
5407 
5408 	rc = gaudi2_coresight_init(hdev);
5409 	if (rc)
5410 		goto disable_queues;
5411 
5412 	rc = gaudi2_enable_msix(hdev);
5413 	if (rc)
5414 		goto disable_queues;
5415 
5416 	/* Perform read from the device to flush all configuration */
5417 	RREG32(mmHW_STATE);
5418 
5419 	return 0;
5420 
5421 disable_queues:
5422 	gaudi2_disable_dma_qmans(hdev);
5423 	gaudi2_disable_mme_qmans(hdev);
5424 	gaudi2_disable_tpc_qmans(hdev);
5425 	gaudi2_disable_rot_qmans(hdev);
5426 	gaudi2_disable_nic_qmans(hdev);
5427 
5428 	gaudi2_disable_timestamp(hdev);
5429 
5430 	return rc;
5431 }
5432 
5433 /**
5434  * gaudi2_send_hard_reset_cmd - common function to handle reset
5435  *
5436  * @hdev: pointer to the habanalabs device structure
5437  *
5438  * This function handles the various possible scenarios for reset.
5439  * It considers if reset is handled by driver\FW and what FW components are loaded
5440  */
5441 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5442 {
5443 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5444 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
5445 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5446 	u32 cpu_boot_status;
5447 
5448 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5449 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5450 
5451 	/*
5452 	 * Handle corner case where failure was at cpu management app load,
5453 	 * and driver didn't detect any failure while loading the FW,
5454 	 * then at such scenario driver will send only HALT_MACHINE
5455 	 * and no one will respond to this request since FW already back to preboot
5456 	 * and it cannot handle such cmd.
5457 	 * In this case next time the management app loads it'll check on events register
5458 	 * which will still have the halt indication, and will reboot the device.
5459 	 * The solution is to let preboot clear all relevant registers before next boot
5460 	 * once driver send COMMS_RST_DEV.
5461 	 */
5462 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5463 
5464 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5465 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5466 		cpu_initialized = true;
5467 
5468 	/*
5469 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5470 	 * 1. FW reset: FW initiate the reset sequence
5471 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
5472 	 *                  reset but not the reset itself as it is not implemented
5473 	 *                  on their part) and LKD will wait to let FW complete the
5474 	 *                  sequence before issuing the reset
5475 	 */
5476 	if (!preboot_only && cpu_initialized) {
5477 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5478 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5479 
5480 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5481 	}
5482 
5483 	/*
5484 	 * When working with preboot (without Linux/Boot fit) we can
5485 	 * communicate only using the COMMS commands to issue halt/reset.
5486 	 *
5487 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5488 	 * attempt to revive the card in the small chance that the f/w has
5489 	 * experienced a watchdog event, which caused it to return back to preboot.
5490 	 * In that case, triggering reset through GIC won't help. We need to
5491 	 * trigger the reset as if Linux wasn't loaded.
5492 	 *
5493 	 * We do it only if the reset cause was HB, because that would be the
5494 	 * indication of such an event.
5495 	 *
5496 	 * In case watchdog hasn't expired but we still got HB, then this won't
5497 	 * do any damage.
5498 	 */
5499 
5500 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
5501 		if (hdev->asic_prop.hard_reset_done_by_fw)
5502 			hl_fw_ask_hard_reset_without_linux(hdev);
5503 		else
5504 			hl_fw_ask_halt_machine_without_linux(hdev);
5505 	}
5506 }
5507 
5508 /**
5509  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5510  *
5511  * @hdev: pointer to the habanalabs device structure
5512  * @reset_sleep_ms: sleep time in msec after reset
5513  *
5514  * This function executes hard reset based on if driver/FW should do the reset
5515  */
5516 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5517 {
5518 	if (hdev->asic_prop.hard_reset_done_by_fw) {
5519 		gaudi2_send_hard_reset_cmd(hdev);
5520 		return;
5521 	}
5522 
5523 	/* Set device to handle FLR by H/W as we will put the device
5524 	 * CPU to halt mode
5525 	 */
5526 	WREG32(mmPCIE_AUX_FLR_CTRL,
5527 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5528 
5529 	gaudi2_send_hard_reset_cmd(hdev);
5530 
5531 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5532 }
5533 
5534 /**
5535  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5536  *
5537  * @hdev: pointer to the habanalabs device structure
5538  * @reset_sleep_ms: sleep time in msec after reset
5539  * @driver_performs_reset: true if driver should perform reset instead of f/w.
5540  *
5541  * This function executes soft reset based on if driver/FW should do the reset
5542  */
5543 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5544 						bool driver_performs_reset)
5545 {
5546 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5547 
5548 	if (!driver_performs_reset) {
5549 		/* set SP to indicate reset request sent to FW */
5550 		if (dyn_regs->cpu_rst_status)
5551 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5552 		else
5553 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5554 
5555 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5556 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5557 		return;
5558 	}
5559 
5560 	/* Block access to engines, QMANs and SM during reset, these
5561 	 * RRs will be reconfigured after soft reset.
5562 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5563 	 */
5564 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5565 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5566 
5567 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5568 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5569 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5570 
5571 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5572 }
5573 
5574 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5575 								u32 poll_timeout_us)
5576 {
5577 	int i, rc = 0;
5578 	u32 reg_val;
5579 
5580 	/* without this sleep reset will not work */
5581 	msleep(reset_sleep_ms);
5582 
5583 	/* We poll the BTM done indication multiple times after reset due to
5584 	 * a HW errata 'GAUDI2_0300'
5585 	 */
5586 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5587 		rc = hl_poll_timeout(
5588 			hdev,
5589 			mmPSOC_GLOBAL_CONF_BTM_FSM,
5590 			reg_val,
5591 			reg_val == 0,
5592 			1000,
5593 			poll_timeout_us);
5594 
5595 	if (rc)
5596 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5597 }
5598 
5599 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5600 {
5601 	int i, rc = 0;
5602 	u32 reg_val;
5603 
5604 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5605 		rc = hl_poll_timeout(
5606 			hdev,
5607 			mmCPU_RST_STATUS_TO_HOST,
5608 			reg_val,
5609 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5610 			1000,
5611 			poll_timeout_us);
5612 
5613 	if (rc)
5614 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5615 				reg_val);
5616 }
5617 
5618 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5619 {
5620 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5621 	u32 poll_timeout_us, reset_sleep_ms;
5622 	bool driver_performs_reset = false;
5623 
5624 	if (hdev->pldm) {
5625 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5626 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5627 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5628 	} else {
5629 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5630 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5631 	}
5632 
5633 	if (fw_reset)
5634 		goto skip_reset;
5635 
5636 	gaudi2_reset_arcs(hdev);
5637 
5638 	if (hard_reset) {
5639 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5640 		gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5641 	} else {
5642 		/*
5643 		 * As we have to support also work with preboot only (which does not supports
5644 		 * soft reset) we have to make sure that security is disabled before letting driver
5645 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5646 		 * secured device with preboot only.
5647 		 */
5648 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5649 							!hdev->asic_prop.fw_security_enabled);
5650 		gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5651 	}
5652 
5653 skip_reset:
5654 	if (driver_performs_reset || hard_reset)
5655 		/*
5656 		 * Instead of waiting for BTM indication we should wait for preboot ready:
5657 		 * Consider the below scenario:
5658 		 * 1. FW update is being triggered
5659 		 *        - setting the dirty bit
5660 		 * 2. hard reset will be triggered due to the dirty bit
5661 		 * 3. FW initiates the reset:
5662 		 *        - dirty bit cleared
5663 		 *        - BTM indication cleared
5664 		 *        - preboot ready indication cleared
5665 		 * 4. during hard reset:
5666 		 *        - BTM indication will be set
5667 		 *        - BIST test performed and another reset triggered
5668 		 * 5. only after this reset the preboot will set the preboot ready
5669 		 *
5670 		 * when polling on BTM indication alone we can lose sync with FW while trying to
5671 		 * communicate with FW that is during reset.
5672 		 * to overcome this we will always wait to preboot ready indication
5673 		 */
5674 		if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) {
5675 			msleep(reset_sleep_ms);
5676 			hl_fw_wait_preboot_ready(hdev);
5677 		} else {
5678 			gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5679 		}
5680 	else
5681 		gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5682 
5683 	if (!gaudi2)
5684 		return;
5685 
5686 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5687 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5688 
5689 	/*
5690 	 * Clear NIC capability mask in order for driver to re-configure
5691 	 * NIC QMANs. NIC ports will not be re-configured during soft
5692 	 * reset as we call gaudi2_nic_init only during hard reset
5693 	 */
5694 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5695 
5696 	if (hard_reset) {
5697 		gaudi2->hw_cap_initialized &=
5698 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5699 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5700 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5701 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5702 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5703 
5704 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5705 	} else {
5706 		gaudi2->hw_cap_initialized &=
5707 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5708 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5709 			HW_CAP_ROT_MASK);
5710 	}
5711 }
5712 
5713 static int gaudi2_suspend(struct hl_device *hdev)
5714 {
5715 	int rc;
5716 
5717 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5718 	if (rc)
5719 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5720 
5721 	return rc;
5722 }
5723 
5724 static int gaudi2_resume(struct hl_device *hdev)
5725 {
5726 	return gaudi2_init_iatu(hdev);
5727 }
5728 
5729 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5730 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
5731 {
5732 	int rc;
5733 
5734 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5735 			VM_DONTCOPY | VM_NORESERVE);
5736 
5737 #ifdef _HAS_DMA_MMAP_COHERENT
5738 
5739 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5740 	if (rc)
5741 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5742 
5743 #else
5744 
5745 	rc = remap_pfn_range(vma, vma->vm_start,
5746 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5747 				size, vma->vm_page_prot);
5748 	if (rc)
5749 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5750 
5751 #endif
5752 
5753 	return rc;
5754 }
5755 
5756 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5757 {
5758 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5759 	u64 hw_cap_mask = 0;
5760 	u64 hw_tpc_cap_bit = 0;
5761 	u64 hw_nic_cap_bit = 0;
5762 	u64 hw_test_cap_bit = 0;
5763 
5764 	switch (hw_queue_id) {
5765 	case GAUDI2_QUEUE_ID_PDMA_0_0:
5766 	case GAUDI2_QUEUE_ID_PDMA_0_1:
5767 	case GAUDI2_QUEUE_ID_PDMA_1_0:
5768 		hw_cap_mask = HW_CAP_PDMA_MASK;
5769 		break;
5770 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5771 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5772 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5773 		break;
5774 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5775 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5776 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5777 		break;
5778 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5779 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5780 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5781 		break;
5782 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5783 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5784 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5785 		break;
5786 
5787 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5788 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
5789 		break;
5790 
5791 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5792 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5793 		break;
5794 
5795 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5796 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5797 		break;
5798 
5799 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5800 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5801 		break;
5802 
5803 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5804 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5805 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5806 
5807 		/* special case where cap bit refers to the first queue id */
5808 		if (!hw_tpc_cap_bit)
5809 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5810 		break;
5811 
5812 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5813 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5814 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5815 		break;
5816 
5817 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5818 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5819 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5820 		break;
5821 
5822 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5823 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5824 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5825 		break;
5826 
5827 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5828 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5829 		break;
5830 
5831 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5832 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5833 		break;
5834 
5835 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5836 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5837 
5838 		/* special case where cap bit refers to the first queue id */
5839 		if (!hw_nic_cap_bit)
5840 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5841 		break;
5842 
5843 	case GAUDI2_QUEUE_ID_CPU_PQ:
5844 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5845 
5846 	default:
5847 		return false;
5848 	}
5849 
5850 	if (hw_tpc_cap_bit)
5851 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5852 
5853 	if (hw_nic_cap_bit)
5854 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5855 
5856 	if (hw_test_cap_bit)
5857 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5858 
5859 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5860 }
5861 
5862 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5863 {
5864 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5865 
5866 	switch (arc_id) {
5867 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5868 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5869 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5870 
5871 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5872 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5873 
5874 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5875 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5876 
5877 	default:
5878 		return false;
5879 	}
5880 }
5881 
5882 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5883 {
5884 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5885 
5886 	switch (arc_id) {
5887 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5888 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5889 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5890 		break;
5891 
5892 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5893 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5894 		break;
5895 
5896 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5897 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5898 		break;
5899 
5900 	default:
5901 		return;
5902 	}
5903 }
5904 
5905 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5906 {
5907 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5908 
5909 	switch (arc_id) {
5910 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5911 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5912 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5913 		break;
5914 
5915 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5916 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5917 		break;
5918 
5919 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5920 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5921 		break;
5922 
5923 	default:
5924 		return;
5925 	}
5926 }
5927 
5928 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5929 {
5930 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5931 	u32 pq_offset, reg_base, db_reg_offset, db_value;
5932 
5933 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5934 		/*
5935 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5936 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5937 		 * number.
5938 		 */
5939 		pq_offset = (hw_queue_id & 0x3) * 4;
5940 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5941 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5942 	} else {
5943 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
5944 	}
5945 
5946 	db_value = pi;
5947 
5948 	/* ring the doorbell */
5949 	WREG32(db_reg_offset, db_value);
5950 
5951 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5952 		/* make sure device CPU will read latest data from host */
5953 		mb();
5954 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5955 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5956 	}
5957 }
5958 
5959 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5960 {
5961 	__le64 *pbd = (__le64 *) bd;
5962 
5963 	/* The QMANs are on the host memory so a simple copy suffice */
5964 	pqe[0] = pbd[0];
5965 	pqe[1] = pbd[1];
5966 }
5967 
5968 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5969 				dma_addr_t *dma_handle, gfp_t flags)
5970 {
5971 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5972 }
5973 
5974 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5975 				void *cpu_addr, dma_addr_t dma_handle)
5976 {
5977 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5978 }
5979 
5980 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5981 				u32 timeout, u64 *result)
5982 {
5983 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5984 
5985 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5986 		if (result)
5987 			*result = 0;
5988 		return 0;
5989 	}
5990 
5991 	if (!timeout)
5992 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5993 
5994 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5995 }
5996 
5997 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5998 				gfp_t mem_flags, dma_addr_t *dma_handle)
5999 {
6000 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6001 		return NULL;
6002 
6003 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6004 }
6005 
6006 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6007 {
6008 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6009 }
6010 
6011 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6012 						dma_addr_t *dma_handle)
6013 {
6014 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6015 }
6016 
6017 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6018 {
6019 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6020 }
6021 
6022 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6023 					enum dma_data_direction dir)
6024 {
6025 	dma_addr_t dma_addr;
6026 
6027 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6028 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6029 		return 0;
6030 
6031 	return dma_addr;
6032 }
6033 
6034 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6035 					enum dma_data_direction dir)
6036 {
6037 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6038 }
6039 
6040 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6041 {
6042 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6043 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6044 
6045 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6046 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6047 		return -EINVAL;
6048 	}
6049 
6050 	/* Just check if CB address is valid */
6051 
6052 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6053 					parser->user_cb_size,
6054 					asic_prop->sram_user_base_address,
6055 					asic_prop->sram_end_address))
6056 		return 0;
6057 
6058 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6059 					parser->user_cb_size,
6060 					asic_prop->dram_user_base_address,
6061 					asic_prop->dram_end_address))
6062 		return 0;
6063 
6064 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6065 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6066 						parser->user_cb_size,
6067 						asic_prop->dmmu.start_addr,
6068 						asic_prop->dmmu.end_addr))
6069 		return 0;
6070 
6071 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6072 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6073 					parser->user_cb_size,
6074 					asic_prop->pmmu.start_addr,
6075 					asic_prop->pmmu.end_addr) ||
6076 			hl_mem_area_inside_range(
6077 					(u64) (uintptr_t) parser->user_cb,
6078 					parser->user_cb_size,
6079 					asic_prop->pmmu_huge.start_addr,
6080 					asic_prop->pmmu_huge.end_addr))
6081 			return 0;
6082 
6083 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6084 		if (!hdev->pdev)
6085 			return 0;
6086 
6087 		if (!device_iommu_mapped(&hdev->pdev->dev))
6088 			return 0;
6089 	}
6090 
6091 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6092 		parser->user_cb, parser->user_cb_size);
6093 
6094 	return -EFAULT;
6095 }
6096 
6097 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6098 {
6099 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6100 
6101 	if (!parser->is_kernel_allocated_cb)
6102 		return gaudi2_validate_cb_address(hdev, parser);
6103 
6104 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6105 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6106 		return -EINVAL;
6107 	}
6108 
6109 	return 0;
6110 }
6111 
6112 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6113 {
6114 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6115 
6116 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6117 		return 0;
6118 
6119 	return hl_fw_send_heartbeat(hdev);
6120 }
6121 
6122 /* This is an internal helper function, used to update the KDMA mmu props.
6123  * Should be called with a proper kdma lock.
6124  */
6125 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6126 					   bool mmu_bypass, u32 asid)
6127 {
6128 	u32 rw_asid, rw_mmu_bp;
6129 
6130 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6131 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6132 
6133 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6134 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6135 
6136 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6137 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6138 }
6139 
6140 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6141 						u32 mon_payload, u32 sync_value)
6142 {
6143 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6144 	u8 mask;
6145 
6146 	sob_offset = sob_id * 4;
6147 	mon_offset = mon_id * 4;
6148 
6149 	/* Reset the SOB value */
6150 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6151 
6152 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6153 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6154 
6155 	/* Configure this address with CS index because CQ_EN is set */
6156 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6157 
6158 	sync_group_id = sob_id / 8;
6159 	mask = ~(1 << (sob_id & 0x7));
6160 	mode = 1; /* comparison mode is "equal to" */
6161 
6162 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6163 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6164 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6165 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6166 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6167 }
6168 
6169 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6170 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6171 					u64 src_addr, u64 dst_addr,
6172 					u32 size, bool is_memset)
6173 {
6174 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6175 	struct hl_cq_entry *cq_base;
6176 	struct hl_cq *cq;
6177 	u64 comp_addr;
6178 	int rc;
6179 
6180 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6181 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6182 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6183 
6184 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6185 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6186 
6187 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6188 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6189 
6190 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6191 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6192 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6193 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6194 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6195 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6196 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6197 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6198 
6199 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6200 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6201 
6202 	if (is_memset)
6203 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6204 
6205 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6206 
6207 	/* Wait for completion */
6208 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6209 	cq_base = cq->kernel_address;
6210 	polling_addr = (u32 *)&cq_base[cq->ci];
6211 
6212 	if (hdev->pldm)
6213 		/* for each 1MB 20 second of timeout */
6214 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6215 	else
6216 		timeout = KDMA_TIMEOUT_USEC;
6217 
6218 	/* Polling */
6219 	rc = hl_poll_timeout_memory(
6220 			hdev,
6221 			polling_addr,
6222 			status,
6223 			(status == 1),
6224 			1000,
6225 			timeout,
6226 			true);
6227 
6228 	*polling_addr = 0;
6229 
6230 	if (rc) {
6231 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6232 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6233 		return rc;
6234 	}
6235 
6236 	cq->ci = hl_cq_inc_ptr(cq->ci);
6237 
6238 	return 0;
6239 }
6240 
6241 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6242 {
6243 	u32 i;
6244 
6245 	for (i = 0 ; i < size ; i += sizeof(u32))
6246 		WREG32(addr + i, val);
6247 }
6248 
6249 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6250 {
6251 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6252 
6253 	if (enable) {
6254 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6255 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6256 	} else {
6257 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6258 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6259 	}
6260 }
6261 
6262 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6263 {
6264 	u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6265 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6266 	u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6267 	struct packet_msg_short *msg_short_pkt;
6268 	dma_addr_t pkt_dma_addr;
6269 	size_t pkt_size;
6270 	int rc;
6271 
6272 	if (hdev->pldm)
6273 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6274 	else
6275 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6276 
6277 	pkt_size = sizeof(*msg_short_pkt);
6278 	msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6279 	if (!msg_short_pkt) {
6280 		dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6281 			hw_queue_id);
6282 		return -ENOMEM;
6283 	}
6284 
6285 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6286 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6287 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6288 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6289 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6290 
6291 	msg_short_pkt->value = cpu_to_le32(sob_val);
6292 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6293 
6294 	/* Reset the SOB value */
6295 	WREG32(sob_addr, 0);
6296 
6297 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6298 	if (rc) {
6299 		dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6300 			hw_queue_id);
6301 		goto free_pkt;
6302 	}
6303 
6304 	rc = hl_poll_timeout(
6305 			hdev,
6306 			sob_addr,
6307 			tmp,
6308 			(tmp == sob_val),
6309 			1000,
6310 			timeout_usec);
6311 
6312 	if (rc == -ETIMEDOUT) {
6313 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6314 			hw_queue_id, tmp);
6315 		rc = -EIO;
6316 	}
6317 
6318 	/* Reset the SOB value */
6319 	WREG32(sob_addr, 0);
6320 
6321 free_pkt:
6322 	hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6323 	return rc;
6324 }
6325 
6326 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6327 {
6328 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6329 
6330 	/*
6331 	 * check capability here as send_cpu_message() won't update the result
6332 	 * value if no capability
6333 	 */
6334 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6335 		return 0;
6336 
6337 	return hl_fw_test_cpu_queue(hdev);
6338 }
6339 
6340 static int gaudi2_test_queues(struct hl_device *hdev)
6341 {
6342 	int i, rc, ret_val = 0;
6343 
6344 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6345 		if (!gaudi2_is_queue_enabled(hdev, i))
6346 			continue;
6347 
6348 		gaudi2_qman_set_test_mode(hdev, i, true);
6349 		rc = gaudi2_test_queue(hdev, i);
6350 		gaudi2_qman_set_test_mode(hdev, i, false);
6351 
6352 		if (rc) {
6353 			ret_val = -EINVAL;
6354 			goto done;
6355 		}
6356 	}
6357 
6358 	rc = gaudi2_test_cpu_queue(hdev);
6359 	if (rc) {
6360 		ret_val = -EINVAL;
6361 		goto done;
6362 	}
6363 
6364 done:
6365 	return ret_val;
6366 }
6367 
6368 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6369 {
6370 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6371 	size_t irq_arr_size;
6372 
6373 	/* TODO: missing gaudi2_nic_resume.
6374 	 * Until implemented nic_hw_cap_initialized will remain zeroed
6375 	 */
6376 	gaudi2_init_arcs(hdev);
6377 	gaudi2_scrub_arcs_dccm(hdev);
6378 	gaudi2_init_security(hdev);
6379 
6380 	/* Unmask all IRQs since some could have been received during the soft reset */
6381 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6382 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6383 }
6384 
6385 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6386 					struct iterate_module_ctx *ctx)
6387 {
6388 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6389 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6390 	bool is_eng_idle;
6391 	int engine_idx;
6392 
6393 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6394 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6395 	else
6396 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6397 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6398 
6399 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6400 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6401 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6402 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6403 
6404 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6405 						IS_TPC_IDLE(tpc_cfg_sts);
6406 	*(idle_data->is_idle) &= is_eng_idle;
6407 
6408 	if (idle_data->mask && !is_eng_idle)
6409 		set_bit(engine_idx, idle_data->mask);
6410 
6411 	if (idle_data->e)
6412 		hl_engine_data_sprintf(idle_data->e,
6413 					idle_data->tpc_fmt, dcore, inst,
6414 					is_eng_idle ? "Y" : "N",
6415 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6416 }
6417 
6418 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6419 					struct engines_data *e)
6420 {
6421 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6422 		mme_arch_sts, dec_swreg15, dec_enabled_bit;
6423 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6424 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6425 	unsigned long *mask = (unsigned long *) mask_arr;
6426 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6427 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6428 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6429 	const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6430 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6431 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6432 	bool is_idle = true, is_eng_idle;
6433 	u64 offset;
6434 
6435 	struct gaudi2_tpc_idle_data tpc_idle_data = {
6436 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6437 		.e = e,
6438 		.mask = mask,
6439 		.is_idle = &is_idle,
6440 	};
6441 	struct iterate_module_ctx tpc_iter = {
6442 		.fn = &gaudi2_is_tpc_engine_idle,
6443 		.data = &tpc_idle_data,
6444 	};
6445 
6446 	int engine_idx, i, j;
6447 
6448 	/* EDMA, Two engines per Dcore */
6449 	if (e)
6450 		hl_engine_data_sprintf(e,
6451 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6452 			"----  ----  -------  ------------  ----------------------\n");
6453 
6454 	for (i = 0; i < NUM_OF_DCORES; i++) {
6455 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6456 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6457 
6458 			if (!(prop->edma_enabled_mask & BIT(seq)))
6459 				continue;
6460 
6461 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6462 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6463 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6464 
6465 			dma_core_idle_ind_mask =
6466 			RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6467 
6468 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6469 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6470 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6471 
6472 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6473 					IS_DMA_IDLE(dma_core_idle_ind_mask);
6474 			is_idle &= is_eng_idle;
6475 
6476 			if (mask && !is_eng_idle)
6477 				set_bit(engine_idx, mask);
6478 
6479 			if (e)
6480 				hl_engine_data_sprintf(e, edma_fmt, i, j,
6481 							is_eng_idle ? "Y" : "N",
6482 							qm_glbl_sts0,
6483 							dma_core_idle_ind_mask);
6484 		}
6485 	}
6486 
6487 	/* PDMA, Two engines in Full chip */
6488 	if (e)
6489 		hl_engine_data_sprintf(e,
6490 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6491 					"----  -------  ------------  ----------------------\n");
6492 
6493 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6494 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6495 		offset = i * PDMA_OFFSET;
6496 		dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6497 
6498 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6499 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6500 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6501 
6502 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6503 				IS_DMA_IDLE(dma_core_idle_ind_mask);
6504 		is_idle &= is_eng_idle;
6505 
6506 		if (mask && !is_eng_idle)
6507 			set_bit(engine_idx, mask);
6508 
6509 		if (e)
6510 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6511 						qm_glbl_sts0, dma_core_idle_ind_mask);
6512 	}
6513 
6514 	/* NIC, twelve macros in Full chip */
6515 	if (e && hdev->nic_ports_mask)
6516 		hl_engine_data_sprintf(e,
6517 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
6518 					"---  -------  ------------  ----------\n");
6519 
6520 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6521 		if (!(i & 1))
6522 			offset = i / 2 * NIC_OFFSET;
6523 		else
6524 			offset += NIC_QM_OFFSET;
6525 
6526 		if (!(hdev->nic_ports_mask & BIT(i)))
6527 			continue;
6528 
6529 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6530 
6531 
6532 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6533 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6534 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6535 
6536 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6537 		is_idle &= is_eng_idle;
6538 
6539 		if (mask && !is_eng_idle)
6540 			set_bit(engine_idx, mask);
6541 
6542 		if (e)
6543 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6544 						qm_glbl_sts0, qm_cgm_sts);
6545 	}
6546 
6547 	if (e)
6548 		hl_engine_data_sprintf(e,
6549 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
6550 					"---  ----  -------  ------------  ---------------\n");
6551 	/* MME, one per Dcore */
6552 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6553 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6554 		offset = i * DCORE_OFFSET;
6555 
6556 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6557 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6558 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6559 
6560 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6561 		is_idle &= is_eng_idle;
6562 
6563 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6564 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6565 		is_idle &= is_eng_idle;
6566 
6567 		if (e)
6568 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
6569 				is_eng_idle ? "Y" : "N",
6570 				qm_glbl_sts0,
6571 				mme_arch_sts);
6572 
6573 		if (mask && !is_eng_idle)
6574 			set_bit(engine_idx, mask);
6575 	}
6576 
6577 	/*
6578 	 * TPC
6579 	 */
6580 	if (e && prop->tpc_enabled_mask)
6581 		hl_engine_data_sprintf(e,
6582 			"\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
6583 			"----  ---  --------  ------------  ----------  ----------------------\n");
6584 
6585 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6586 
6587 	/* Decoders, two each Dcore and two shared PCIe decoders */
6588 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6589 		hl_engine_data_sprintf(e,
6590 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
6591 			"----  ---  -------  ---------------\n");
6592 
6593 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6594 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6595 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6596 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6597 				continue;
6598 
6599 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6600 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6601 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6602 
6603 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6604 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6605 			is_idle &= is_eng_idle;
6606 
6607 			if (mask && !is_eng_idle)
6608 				set_bit(engine_idx, mask);
6609 
6610 			if (e)
6611 				hl_engine_data_sprintf(e, dec_fmt, i, j,
6612 							is_eng_idle ? "Y" : "N", dec_swreg15);
6613 		}
6614 	}
6615 
6616 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6617 		hl_engine_data_sprintf(e,
6618 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
6619 			"--------  -------  ---------------\n");
6620 
6621 	/* Check shared(PCIe) decoders */
6622 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6623 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
6624 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6625 			continue;
6626 
6627 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6628 		offset = i * DCORE_DEC_OFFSET;
6629 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6630 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6631 		is_idle &= is_eng_idle;
6632 
6633 		if (mask && !is_eng_idle)
6634 			set_bit(engine_idx, mask);
6635 
6636 		if (e)
6637 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6638 						is_eng_idle ? "Y" : "N", dec_swreg15);
6639 	}
6640 
6641 	if (e)
6642 		hl_engine_data_sprintf(e,
6643 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6644 			"----  ----  -------  ------------  ----------  -------------\n");
6645 
6646 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6647 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6648 
6649 		offset = i * ROT_OFFSET;
6650 
6651 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6652 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6653 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6654 
6655 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6656 		is_idle &= is_eng_idle;
6657 
6658 		if (mask && !is_eng_idle)
6659 			set_bit(engine_idx, mask);
6660 
6661 		if (e)
6662 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6663 					qm_glbl_sts0, qm_cgm_sts, "-");
6664 	}
6665 
6666 	return is_idle;
6667 }
6668 
6669 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6670 	__acquires(&gaudi2->hw_queues_lock)
6671 {
6672 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6673 
6674 	spin_lock(&gaudi2->hw_queues_lock);
6675 }
6676 
6677 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6678 	__releases(&gaudi2->hw_queues_lock)
6679 {
6680 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6681 
6682 	spin_unlock(&gaudi2->hw_queues_lock);
6683 }
6684 
6685 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6686 {
6687 	return hdev->pdev->device;
6688 }
6689 
6690 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6691 {
6692 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6693 
6694 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6695 		return 0;
6696 
6697 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6698 }
6699 
6700 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6701 {
6702 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6703 }
6704 
6705 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6706 {
6707 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6708 
6709 	if (aggregate) {
6710 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
6711 		return gaudi2->events_stat_aggregate;
6712 	}
6713 
6714 	*size = (u32) sizeof(gaudi2->events_stat);
6715 	return gaudi2->events_stat;
6716 }
6717 
6718 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6719 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6720 {
6721 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6722 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
6723 
6724 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6725 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6726 
6727 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6728 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6729 
6730 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6731 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6732 
6733 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6734 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6735 
6736 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6737 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6738 }
6739 
6740 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6741 {
6742 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6743 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6744 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6745 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
6746 	u32 vdec_id, i, ports_offset, reg_val;
6747 	u8 edma_seq_base;
6748 
6749 	/* EDMA */
6750 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6751 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6752 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6753 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6754 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6755 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6756 	}
6757 
6758 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6759 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6760 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6761 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6762 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6763 	}
6764 
6765 	/* Sync Mngr */
6766 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6767 	/*
6768 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6769 	 * for any access type
6770 	 */
6771 	if (dcore_id > 0) {
6772 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6773 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6774 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6775 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6776 	}
6777 
6778 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6779 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6780 
6781 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6782 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
6783 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6784 				dcore_offset + ports_offset, 0);
6785 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6786 				dcore_offset + ports_offset, rw_asid);
6787 	}
6788 
6789 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6790 		ports_offset = i * DCORE_MME_WB_OFFSET;
6791 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6792 				dcore_offset + ports_offset, 0);
6793 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6794 				dcore_offset + ports_offset, rw_asid);
6795 	}
6796 
6797 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6798 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6799 
6800 	/*
6801 	 * Decoders
6802 	 */
6803 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6804 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6805 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6806 	}
6807 }
6808 
6809 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6810 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6811 {
6812 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6813 
6814 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6815 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6816 
6817 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6818 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6819 
6820 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6821 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6822 
6823 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6824 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6825 
6826 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6827 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6828 }
6829 
6830 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6831 							u32 rw_asid, u32 rw_mmu_bp)
6832 {
6833 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6834 
6835 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6836 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6837 }
6838 
6839 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6840 {
6841 	u32 reg_base, reg_offset, reg_val = 0;
6842 
6843 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
6844 
6845 	/* Enable MMU and configure asid for all relevant ARC regions */
6846 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6847 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6848 
6849 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6850 	WREG32(reg_base + reg_offset, reg_val);
6851 
6852 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6853 	WREG32(reg_base + reg_offset, reg_val);
6854 
6855 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6856 	WREG32(reg_base + reg_offset, reg_val);
6857 
6858 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6859 	WREG32(reg_base + reg_offset, reg_val);
6860 
6861 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6862 	WREG32(reg_base + reg_offset, reg_val);
6863 
6864 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6865 	WREG32(reg_base + reg_offset, reg_val);
6866 
6867 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6868 	WREG32(reg_base + reg_offset, reg_val);
6869 
6870 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6871 	WREG32(reg_base + reg_offset, reg_val);
6872 
6873 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6874 	WREG32(reg_base + reg_offset, reg_val);
6875 
6876 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6877 	WREG32(reg_base + reg_offset, reg_val);
6878 
6879 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6880 	WREG32(reg_base + reg_offset, reg_val);
6881 }
6882 
6883 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6884 {
6885 	int i;
6886 
6887 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6888 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6889 
6890 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6891 		gaudi2_arc_mmu_prepare(hdev, i, asid);
6892 
6893 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6894 		if (!gaudi2_is_queue_enabled(hdev, i))
6895 			continue;
6896 
6897 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6898 	}
6899 
6900 	return 0;
6901 }
6902 
6903 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6904 {
6905 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6906 	u32 rw_asid, offset;
6907 	int rc, i;
6908 
6909 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6910 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6911 
6912 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6913 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6914 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6915 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6916 
6917 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6918 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6919 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6920 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6921 
6922 	/* ROT */
6923 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6924 		offset = i * ROT_OFFSET;
6925 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6926 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6927 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6928 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6929 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6930 	}
6931 
6932 	/* Shared Decoders are the last bits in the decoders mask */
6933 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6934 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6935 
6936 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6937 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6938 
6939 	/* arc farm arc dup eng */
6940 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6941 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6942 
6943 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6944 	if (rc)
6945 		return rc;
6946 
6947 	return 0;
6948 }
6949 
6950 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
6951 					struct iterate_module_ctx *ctx)
6952 {
6953 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6954 
6955 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6956 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6957 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6958 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6959 }
6960 
6961 /* zero the MMUBP and set the ASID */
6962 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6963 {
6964 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6965 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
6966 	struct iterate_module_ctx tpc_iter = {
6967 		.fn = &gaudi2_tpc_mmu_prepare,
6968 		.data = &tpc_mmu_data,
6969 	};
6970 	int rc, i;
6971 
6972 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6973 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6974 		return -EINVAL;
6975 	}
6976 
6977 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6978 		return 0;
6979 
6980 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
6981 	if (rc)
6982 		return rc;
6983 
6984 	/* configure DCORE MMUs */
6985 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6986 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6987 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6988 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
6989 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
6990 
6991 	return 0;
6992 }
6993 
6994 static inline bool is_info_event(u32 event)
6995 {
6996 	switch (event) {
6997 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6998 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
6999 
7000 	/* return in case of NIC status event - these events are received periodically and not as
7001 	 * an indication to an error.
7002 	 */
7003 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7004 		return true;
7005 	default:
7006 		return false;
7007 	}
7008 }
7009 
7010 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7011 			bool ratelimited, const char *fmt, ...)
7012 {
7013 	struct va_format vaf;
7014 	va_list args;
7015 
7016 	va_start(args, fmt);
7017 	vaf.fmt = fmt;
7018 	vaf.va = &args;
7019 
7020 	if (ratelimited)
7021 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7022 			gaudi2_irq_map_table[event_type].valid ?
7023 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7024 	else
7025 		dev_err(hdev->dev, "%s: %pV\n",
7026 			gaudi2_irq_map_table[event_type].valid ?
7027 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7028 
7029 	va_end(args);
7030 }
7031 
7032 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7033 		struct hl_eq_ecc_data *ecc_data)
7034 {
7035 	u64 ecc_address = 0, ecc_syndrom = 0;
7036 	u8 memory_wrapper_idx = 0;
7037 
7038 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7039 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7040 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7041 
7042 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7043 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
7044 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7045 
7046 	return !!ecc_data->is_critical;
7047 }
7048 
7049 /*
7050  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7051  *
7052  * @idx: the current pi/ci value
7053  * @q_len: the queue length (power of 2)
7054  *
7055  * @return the cyclically decremented index
7056  */
7057 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
7058 {
7059 	u32 mask = q_len - 1;
7060 
7061 	/*
7062 	 * modular decrement is equivalent to adding (queue_size -1)
7063 	 * later we take LSBs to make sure the value is in the
7064 	 * range [0, queue_len - 1]
7065 	 */
7066 	return (idx + q_len - 1) & mask;
7067 }
7068 
7069 /**
7070  * gaudi2_print_sw_config_stream_data - print SW config stream data
7071  *
7072  * @hdev: pointer to the habanalabs device structure
7073  * @stream: the QMAN's stream
7074  * @qman_base: base address of QMAN registers block
7075  */
7076 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
7077 						u32 stream, u64 qman_base)
7078 {
7079 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7080 	u32 cq_ptr_lo_off, size;
7081 
7082 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
7083 
7084 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
7085 									stream * cq_ptr_lo_off;
7086 
7087 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7088 
7089 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7090 
7091 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7092 	size = RREG32(cq_tsize);
7093 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7094 		stream, cq_ptr, size);
7095 }
7096 
7097 /**
7098  * gaudi2_print_last_pqes_on_err - print last PQEs on error
7099  *
7100  * @hdev: pointer to the habanalabs device structure
7101  * @qid_base: first QID of the QMAN (out of 4 streams)
7102  * @stream: the QMAN's stream
7103  * @qman_base: base address of QMAN registers block
7104  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7105  */
7106 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
7107 						u64 qman_base, bool pr_sw_conf)
7108 {
7109 	u32 ci, qm_ci_stream_off;
7110 	struct hl_hw_queue *q;
7111 	u64 pq_ci;
7112 	int i;
7113 
7114 	q = &hdev->kernel_queues[qid_base + stream];
7115 
7116 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
7117 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
7118 						stream * qm_ci_stream_off;
7119 
7120 	hdev->asic_funcs->hw_queues_lock(hdev);
7121 
7122 	if (pr_sw_conf)
7123 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7124 
7125 	ci = RREG32(pq_ci);
7126 
7127 	/* we should start printing form ci -1 */
7128 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7129 
7130 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7131 		struct hl_bd *bd;
7132 		u64 addr;
7133 		u32 len;
7134 
7135 		bd = q->kernel_address;
7136 		bd += ci;
7137 
7138 		len = le32_to_cpu(bd->len);
7139 		/* len 0 means uninitialized entry- break */
7140 		if (!len)
7141 			break;
7142 
7143 		addr = le64_to_cpu(bd->ptr);
7144 
7145 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7146 			stream, ci, addr, len);
7147 
7148 		/* get previous ci, wrap if needed */
7149 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7150 	}
7151 
7152 	hdev->asic_funcs->hw_queues_unlock(hdev);
7153 }
7154 
7155 /**
7156  * print_qman_data_on_err - extract QMAN data on error
7157  *
7158  * @hdev: pointer to the habanalabs device structure
7159  * @qid_base: first QID of the QMAN (out of 4 streams)
7160  * @stream: the QMAN's stream
7161  * @qman_base: base address of QMAN registers block
7162  *
7163  * This function attempt to extract as much data as possible on QMAN error.
7164  * On upper CP print the SW config stream data and last 8 PQEs.
7165  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7166  */
7167 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7168 {
7169 	u32 i;
7170 
7171 	if (stream != QMAN_STREAMS) {
7172 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7173 		return;
7174 	}
7175 
7176 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7177 
7178 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7179 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7180 }
7181 
7182 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7183 							u64 qman_base, u32 qid_base)
7184 {
7185 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7186 	u64 glbl_sts_addr, arb_err_addr;
7187 	char reg_desc[32];
7188 
7189 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7190 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7191 
7192 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7193 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7194 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7195 
7196 		if (!glbl_sts_val)
7197 			continue;
7198 
7199 		if (i == QMAN_STREAMS) {
7200 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7201 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7202 		} else {
7203 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7204 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7205 		}
7206 
7207 		for (j = 0 ; j < num_error_causes ; j++)
7208 			if (glbl_sts_val & BIT(j)) {
7209 				gaudi2_print_event(hdev, event_type, true,
7210 					"%s. err cause: %s", reg_desc,
7211 					i == QMAN_STREAMS ?
7212 					gaudi2_qman_lower_cp_error_cause[j] :
7213 					gaudi2_qman_error_cause[j]);
7214 				error_count++;
7215 			}
7216 
7217 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7218 	}
7219 
7220 	arb_err_val = RREG32(arb_err_addr);
7221 
7222 	if (!arb_err_val)
7223 		goto out;
7224 
7225 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7226 		if (arb_err_val & BIT(j)) {
7227 			gaudi2_print_event(hdev, event_type, true,
7228 				"ARB_ERR. err cause: %s",
7229 				gaudi2_qman_arb_error_cause[j]);
7230 			error_count++;
7231 		}
7232 	}
7233 
7234 out:
7235 	return error_count;
7236 }
7237 
7238 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7239 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7240 			enum gaudi2_engine_id id, u64 *event_mask)
7241 {
7242 	u32 razwi_hi, razwi_lo, razwi_xy;
7243 	u16 eng_id = id;
7244 	u8 rd_wr_flag;
7245 
7246 	if (is_write) {
7247 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7248 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7249 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7250 		rd_wr_flag = HL_RAZWI_WRITE;
7251 	} else {
7252 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7253 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7254 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7255 		rd_wr_flag = HL_RAZWI_READ;
7256 	}
7257 
7258 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7259 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7260 
7261 	dev_err_ratelimited(hdev->dev,
7262 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7263 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7264 }
7265 
7266 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7267 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7268 			enum gaudi2_engine_id id, u64 *event_mask)
7269 {
7270 	u64 razwi_addr = CFG_BASE;
7271 	u32 razwi_xy;
7272 	u16 eng_id = id;
7273 	u8 rd_wr_flag;
7274 
7275 	if (is_write) {
7276 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7277 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7278 		rd_wr_flag = HL_RAZWI_WRITE;
7279 	} else {
7280 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7281 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7282 		rd_wr_flag = HL_RAZWI_READ;
7283 	}
7284 
7285 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7286 	dev_err_ratelimited(hdev->dev,
7287 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7288 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7289 						razwi_xy);
7290 }
7291 
7292 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7293 						enum razwi_event_sources module, u8 module_idx)
7294 {
7295 	switch (module) {
7296 	case RAZWI_TPC:
7297 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7298 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7299 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7300 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7301 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7302 
7303 	case RAZWI_MME:
7304 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7305 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7306 
7307 	case RAZWI_EDMA:
7308 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7309 			(module_idx % NUM_OF_EDMA_PER_DCORE));
7310 
7311 	case RAZWI_PDMA:
7312 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7313 
7314 	case RAZWI_NIC:
7315 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7316 
7317 	case RAZWI_DEC:
7318 		if (module_idx == 8)
7319 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7320 
7321 		if (module_idx == 9)
7322 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7323 					;
7324 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7325 				(module_idx % NUM_OF_DEC_PER_DCORE) +
7326 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7327 
7328 	case RAZWI_ROT:
7329 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7330 
7331 	default:
7332 		return GAUDI2_ENGINE_ID_SIZE;
7333 	}
7334 }
7335 
7336 /*
7337  * This function handles RR(Range register) hit events.
7338  * raised be initiators not PSOC RAZWI.
7339  */
7340 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7341 				enum razwi_event_sources module, u8 module_idx,
7342 				u8 module_sub_idx, u64 *event_mask)
7343 {
7344 	bool via_sft = false;
7345 	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
7346 	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
7347 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7348 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7349 	char initiator_name[64];
7350 
7351 	switch (module) {
7352 	case RAZWI_TPC:
7353 		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
7354 
7355 		/* TODO : remove this check and depend only on tpc routers table
7356 		 * when SW-118828 is resolved
7357 		 */
7358 		if (!hdev->asic_prop.fw_security_enabled &&
7359 				((module_idx == 0) || (module_idx == 1)))
7360 			lbw_rtr_id = DCORE0_RTR0;
7361 		else
7362 			lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
7363 		sprintf(initiator_name, "TPC_%u", module_idx);
7364 		break;
7365 	case RAZWI_MME:
7366 		sprintf(initiator_name, "MME_%u", module_idx);
7367 		switch (module_sub_idx) {
7368 		case MME_WAP0:
7369 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7370 			break;
7371 		case MME_WAP1:
7372 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7373 			break;
7374 		case MME_WRITE:
7375 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7376 			break;
7377 		case MME_READ:
7378 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7379 			break;
7380 		case MME_SBTE0:
7381 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7382 			break;
7383 		case MME_SBTE1:
7384 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7385 			break;
7386 		case MME_SBTE2:
7387 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7388 			break;
7389 		case MME_SBTE3:
7390 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7391 			break;
7392 		case MME_SBTE4:
7393 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7394 			break;
7395 		default:
7396 			return;
7397 		}
7398 		lbw_rtr_id = hbw_rtr_id;
7399 		break;
7400 	case RAZWI_EDMA:
7401 		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
7402 		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
7403 		/* SFT has separate MSTR_IF for LBW, only there we can
7404 		 * read the LBW razwi related registers
7405 		 */
7406 		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
7407 								dcore_id * SFT_DCORE_OFFSET;
7408 		via_sft = true;
7409 		sprintf(initiator_name, "EDMA_%u", module_idx);
7410 		break;
7411 	case RAZWI_PDMA:
7412 		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
7413 		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
7414 		sprintf(initiator_name, "PDMA_%u", module_idx);
7415 		break;
7416 	case RAZWI_NIC:
7417 		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
7418 		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
7419 		sprintf(initiator_name, "NIC_%u", module_idx);
7420 		break;
7421 	case RAZWI_DEC:
7422 		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
7423 		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
7424 		sprintf(initiator_name, "DEC_%u", module_idx);
7425 		break;
7426 	case RAZWI_ROT:
7427 		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
7428 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
7429 		sprintf(initiator_name, "ROT_%u", module_idx);
7430 		break;
7431 	default:
7432 		return;
7433 	}
7434 
7435 	/* Find router mstr_if register base */
7436 	if (!via_sft) {
7437 		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
7438 		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
7439 		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7440 				dcore_id * DCORE_OFFSET +
7441 				dcore_rtr_id * DCORE_RTR_OFFSET +
7442 				RTR_MSTR_IF_OFFSET;
7443 		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
7444 				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
7445 	}
7446 
7447 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
7448 	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7449 	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7450 	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7451 	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7452 
7453 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
7454 	if (hbw_shrd_aw) {
7455 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
7456 						initiator_name, eng_id, event_mask);
7457 
7458 		/* Clear event indication */
7459 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7460 	}
7461 
7462 	if (hbw_shrd_ar) {
7463 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
7464 						initiator_name, eng_id, event_mask);
7465 
7466 		/* Clear event indication */
7467 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7468 	}
7469 
7470 	if (lbw_shrd_aw) {
7471 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
7472 						initiator_name, eng_id, event_mask);
7473 
7474 		/* Clear event indication */
7475 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7476 	}
7477 
7478 	if (lbw_shrd_ar) {
7479 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
7480 						initiator_name, eng_id, event_mask);
7481 
7482 		/* Clear event indication */
7483 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7484 	}
7485 }
7486 
7487 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7488 {
7489 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7490 	u8 mod_idx, sub_mod;
7491 
7492 	/* check all TPCs */
7493 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7494 		if (prop->tpc_enabled_mask & BIT(mod_idx))
7495 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
7496 	}
7497 
7498 	/* check all MMEs */
7499 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7500 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7501 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7502 									sub_mod, NULL);
7503 
7504 	/* check all EDMAs */
7505 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7506 		if (prop->edma_enabled_mask & BIT(mod_idx))
7507 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
7508 
7509 	/* check all PDMAs */
7510 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7511 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
7512 
7513 	/* check all NICs */
7514 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7515 		if (hdev->nic_ports_mask & BIT(mod_idx))
7516 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7517 								NULL);
7518 
7519 	/* check all DECs */
7520 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7521 		if (prop->decoder_enabled_mask & BIT(mod_idx))
7522 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
7523 
7524 	/* check all ROTs */
7525 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7526 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
7527 }
7528 
7529 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7530 {
7531 	switch (rtr_id) {
7532 	case DCORE0_RTR0:
7533 		return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7534 	case DCORE0_RTR1:
7535 		return "TPC0/1";
7536 	case DCORE0_RTR2:
7537 		return "TPC2/3";
7538 	case DCORE0_RTR3:
7539 		return "TPC4/5";
7540 	case DCORE0_RTR4:
7541 		return "MME0_SBTE0/1";
7542 	case DCORE0_RTR5:
7543 		return "MME0_WAP0/SBTE2";
7544 	case DCORE0_RTR6:
7545 		return "MME0_CTRL_WR/SBTE3";
7546 	case DCORE0_RTR7:
7547 		return "MME0_WAP1/CTRL_RD/SBTE4";
7548 	case DCORE1_RTR0:
7549 		return "MME1_WAP1/CTRL_RD/SBTE4";
7550 	case DCORE1_RTR1:
7551 		return "MME1_CTRL_WR/SBTE3";
7552 	case DCORE1_RTR2:
7553 		return "MME1_WAP0/SBTE2";
7554 	case DCORE1_RTR3:
7555 		return "MME1_SBTE0/1";
7556 	case DCORE1_RTR4:
7557 		return "TPC10/11";
7558 	case DCORE1_RTR5:
7559 		return "TPC8/9";
7560 	case DCORE1_RTR6:
7561 		return "TPC6/7";
7562 	case DCORE1_RTR7:
7563 		return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7564 	case DCORE2_RTR0:
7565 		return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7566 	case DCORE2_RTR1:
7567 		return "TPC16/17";
7568 	case DCORE2_RTR2:
7569 		return "TPC14/15";
7570 	case DCORE2_RTR3:
7571 		return "TPC12/13";
7572 	case DCORE2_RTR4:
7573 		return "MME2_SBTE0/1";
7574 	case DCORE2_RTR5:
7575 		return "MME2_WAP0/SBTE2";
7576 	case DCORE2_RTR6:
7577 		return "MME2_CTRL_WR/SBTE3";
7578 	case DCORE2_RTR7:
7579 		return "MME2_WAP1/CTRL_RD/SBTE4";
7580 	case DCORE3_RTR0:
7581 		return "MME3_WAP1/CTRL_RD/SBTE4";
7582 	case DCORE3_RTR1:
7583 		return "MME3_CTRL_WR/SBTE3";
7584 	case DCORE3_RTR2:
7585 		return "MME3_WAP0/SBTE2";
7586 	case DCORE3_RTR3:
7587 		return "MME3_SBTE0/1";
7588 	case DCORE3_RTR4:
7589 		return "TPC18/19";
7590 	case DCORE3_RTR5:
7591 		return "TPC20/21";
7592 	case DCORE3_RTR6:
7593 		return "TPC22/23";
7594 	case DCORE3_RTR7:
7595 		return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7596 	default:
7597 	return "N/A";
7598 	}
7599 }
7600 
7601 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
7602 {
7603 	switch (rtr_id) {
7604 	case DCORE0_RTR0:
7605 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
7606 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
7607 		engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
7608 		engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
7609 		engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7610 		engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
7611 		engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
7612 		engines[7] = GAUDI2_ENGINE_ID_PCIE;
7613 		engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
7614 		engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
7615 		engines[10] = GAUDI2_ENGINE_ID_PSOC;
7616 		return 11;
7617 
7618 	case DCORE0_RTR1:
7619 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
7620 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
7621 		return 2;
7622 
7623 	case DCORE0_RTR2:
7624 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
7625 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
7626 		return 2;
7627 
7628 	case DCORE0_RTR3:
7629 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
7630 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
7631 		return 2;
7632 
7633 	case DCORE0_RTR4:
7634 	case DCORE0_RTR5:
7635 	case DCORE0_RTR6:
7636 	case DCORE0_RTR7:
7637 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
7638 		return 1;
7639 
7640 	case DCORE1_RTR0:
7641 	case DCORE1_RTR1:
7642 	case DCORE1_RTR2:
7643 	case DCORE1_RTR3:
7644 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
7645 		return 1;
7646 
7647 	case DCORE1_RTR4:
7648 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
7649 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
7650 		return 2;
7651 
7652 	case DCORE1_RTR5:
7653 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
7654 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
7655 		return 2;
7656 
7657 	case DCORE1_RTR6:
7658 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
7659 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
7660 		return 2;
7661 
7662 	case DCORE1_RTR7:
7663 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
7664 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
7665 		engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
7666 		engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
7667 		engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
7668 		engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
7669 		engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
7670 		engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
7671 		engines[8] = GAUDI2_ENGINE_ID_KDMA;
7672 		engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
7673 		engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
7674 		return 11;
7675 
7676 	case DCORE2_RTR0:
7677 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
7678 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
7679 		engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
7680 		engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
7681 		engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
7682 		engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
7683 		engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
7684 		engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
7685 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7686 		return 9;
7687 
7688 	case DCORE2_RTR1:
7689 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
7690 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
7691 		return 2;
7692 
7693 	case DCORE2_RTR2:
7694 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
7695 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
7696 		return 2;
7697 
7698 	case DCORE2_RTR3:
7699 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
7700 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
7701 		return 2;
7702 
7703 	case DCORE2_RTR4:
7704 	case DCORE2_RTR5:
7705 	case DCORE2_RTR6:
7706 	case DCORE2_RTR7:
7707 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
7708 		return 1;
7709 	case DCORE3_RTR0:
7710 	case DCORE3_RTR1:
7711 	case DCORE3_RTR2:
7712 	case DCORE3_RTR3:
7713 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
7714 		return 1;
7715 	case DCORE3_RTR4:
7716 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
7717 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
7718 		return 2;
7719 	case DCORE3_RTR5:
7720 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
7721 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
7722 		return 2;
7723 	case DCORE3_RTR6:
7724 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
7725 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
7726 		return 2;
7727 	case DCORE3_RTR7:
7728 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
7729 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
7730 		engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
7731 		engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
7732 		engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
7733 		engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
7734 		engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
7735 		engines[7] = GAUDI2_ENGINE_ID_ROT_1;
7736 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7737 		return 9;
7738 	default:
7739 		return 0;
7740 	}
7741 }
7742 
7743 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7744 							u64 rtr_ctrl_base_addr, bool is_write,
7745 							u64 *event_mask)
7746 {
7747 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7748 	u32 razwi_hi, razwi_lo;
7749 	u8 rd_wr_flag;
7750 
7751 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7752 
7753 	if (is_write) {
7754 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7755 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7756 		rd_wr_flag = HL_RAZWI_WRITE;
7757 
7758 		/* Clear set indication */
7759 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7760 	} else {
7761 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7762 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7763 		rd_wr_flag = HL_RAZWI_READ;
7764 
7765 		/* Clear set indication */
7766 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7767 	}
7768 
7769 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
7770 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7771 	dev_err_ratelimited(hdev->dev,
7772 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7773 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7774 
7775 	dev_err_ratelimited(hdev->dev,
7776 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7777 }
7778 
7779 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7780 							u64 rtr_ctrl_base_addr, bool is_write,
7781 							u64 *event_mask)
7782 {
7783 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7784 	u64 razwi_addr = CFG_BASE;
7785 	u8 rd_wr_flag;
7786 
7787 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7788 
7789 	if (is_write) {
7790 		razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7791 		rd_wr_flag = HL_RAZWI_WRITE;
7792 
7793 		/* Clear set indication */
7794 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7795 	} else {
7796 		razwi_addr += RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7797 		rd_wr_flag = HL_RAZWI_READ;
7798 
7799 		/* Clear set indication */
7800 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7801 	}
7802 
7803 	hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
7804 			event_mask);
7805 	dev_err_ratelimited(hdev->dev,
7806 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address 0x%llX\n",
7807 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
7808 
7809 	dev_err_ratelimited(hdev->dev,
7810 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7811 }
7812 
7813 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
7814 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
7815 {
7816 	u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7817 						razwi_mask_info, razwi_intr = 0, error_count = 0;
7818 	int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7819 	u64 rtr_ctrl_base_addr;
7820 
7821 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7822 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7823 		if (!razwi_intr)
7824 			return 0;
7825 	}
7826 
7827 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7828 	xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7829 
7830 	dev_err_ratelimited(hdev->dev,
7831 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7832 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7833 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7834 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7835 		xy,
7836 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7837 
7838 	if (xy == 0) {
7839 		dev_err_ratelimited(hdev->dev,
7840 				"PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7841 		goto clear;
7842 	}
7843 
7844 	/* Find router id by router coordinates */
7845 	for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7846 		if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7847 			break;
7848 
7849 	if (rtr_id == rtr_map_arr_len) {
7850 		dev_err_ratelimited(hdev->dev,
7851 				"PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7852 		goto clear;
7853 	}
7854 
7855 	/* Find router mstr_if register base */
7856 	dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7857 	dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7858 	rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7859 				dcore_rtr_id * DCORE_RTR_OFFSET;
7860 
7861 	hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7862 	hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7863 	lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7864 	lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7865 
7866 	if (hbw_aw_set)
7867 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7868 						rtr_ctrl_base_addr, true, event_mask);
7869 
7870 	if (hbw_ar_set)
7871 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7872 						rtr_ctrl_base_addr, false, event_mask);
7873 
7874 	if (lbw_aw_set)
7875 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7876 						rtr_ctrl_base_addr, true, event_mask);
7877 
7878 	if (lbw_ar_set)
7879 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7880 						rtr_ctrl_base_addr, false, event_mask);
7881 
7882 	error_count++;
7883 
7884 clear:
7885 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7886 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7887 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7888 
7889 	return error_count;
7890 }
7891 
7892 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
7893 {
7894 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7895 
7896 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7897 
7898 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7899 		if (sts_val & BIT(i)) {
7900 			gaudi2_print_event(hdev, event_type, true,
7901 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
7902 			sts_clr_val |= BIT(i);
7903 			error_count++;
7904 		}
7905 	}
7906 
7907 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7908 
7909 	return error_count;
7910 }
7911 
7912 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7913 					bool extended_err_check, u64 *event_mask)
7914 {
7915 	enum razwi_event_sources module;
7916 	u32 error_count = 0;
7917 	u64 qman_base;
7918 	u8 index;
7919 
7920 	switch (event_type) {
7921 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7922 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7923 		qman_base = mmDCORE0_TPC0_QM_BASE +
7924 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7925 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7926 		module = RAZWI_TPC;
7927 		break;
7928 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7929 		qman_base = mmDCORE0_TPC6_QM_BASE;
7930 		module = RAZWI_TPC;
7931 		break;
7932 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7933 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7934 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7935 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7936 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7937 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7938 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7939 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7940 		module = RAZWI_MME;
7941 		break;
7942 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7943 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7944 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7945 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7946 		module = RAZWI_PDMA;
7947 		break;
7948 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7949 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7950 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7951 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7952 		module = RAZWI_ROT;
7953 		break;
7954 	default:
7955 		return 0;
7956 	}
7957 
7958 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7959 
7960 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
7961 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7962 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7963 		error_count += _gaudi2_handle_qm_sei_err(hdev,
7964 					qman_base + NIC_QM_OFFSET, event_type);
7965 
7966 	if (extended_err_check) {
7967 		/* check if RAZWI happened */
7968 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
7969 		hl_check_for_glbl_errors(hdev);
7970 	}
7971 
7972 	return error_count;
7973 }
7974 
7975 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7976 {
7977 	u32 qid_base, error_count = 0;
7978 	u64 qman_base;
7979 	u8 index;
7980 
7981 	switch (event_type) {
7982 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7983 		index = event_type - GAUDI2_EVENT_TPC0_QM;
7984 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7985 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7986 		break;
7987 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7988 		index = event_type - GAUDI2_EVENT_TPC6_QM;
7989 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7990 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7991 		break;
7992 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7993 		index = event_type - GAUDI2_EVENT_TPC12_QM;
7994 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7995 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7996 		break;
7997 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7998 		index = event_type - GAUDI2_EVENT_TPC18_QM;
7999 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8000 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8001 		break;
8002 	case GAUDI2_EVENT_TPC24_QM:
8003 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8004 		qman_base = mmDCORE0_TPC6_QM_BASE;
8005 		break;
8006 	case GAUDI2_EVENT_MME0_QM:
8007 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8008 		qman_base = mmDCORE0_MME_QM_BASE;
8009 		break;
8010 	case GAUDI2_EVENT_MME1_QM:
8011 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8012 		qman_base = mmDCORE1_MME_QM_BASE;
8013 		break;
8014 	case GAUDI2_EVENT_MME2_QM:
8015 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8016 		qman_base = mmDCORE2_MME_QM_BASE;
8017 		break;
8018 	case GAUDI2_EVENT_MME3_QM:
8019 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8020 		qman_base = mmDCORE3_MME_QM_BASE;
8021 		break;
8022 	case GAUDI2_EVENT_HDMA0_QM:
8023 		index = 0;
8024 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8025 		qman_base = mmDCORE0_EDMA0_QM_BASE;
8026 		break;
8027 	case GAUDI2_EVENT_HDMA1_QM:
8028 		index = 1;
8029 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8030 		qman_base = mmDCORE0_EDMA1_QM_BASE;
8031 		break;
8032 	case GAUDI2_EVENT_HDMA2_QM:
8033 		index = 2;
8034 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8035 		qman_base = mmDCORE1_EDMA0_QM_BASE;
8036 		break;
8037 	case GAUDI2_EVENT_HDMA3_QM:
8038 		index = 3;
8039 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8040 		qman_base = mmDCORE1_EDMA1_QM_BASE;
8041 		break;
8042 	case GAUDI2_EVENT_HDMA4_QM:
8043 		index = 4;
8044 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8045 		qman_base = mmDCORE2_EDMA0_QM_BASE;
8046 		break;
8047 	case GAUDI2_EVENT_HDMA5_QM:
8048 		index = 5;
8049 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8050 		qman_base = mmDCORE2_EDMA1_QM_BASE;
8051 		break;
8052 	case GAUDI2_EVENT_HDMA6_QM:
8053 		index = 6;
8054 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8055 		qman_base = mmDCORE3_EDMA0_QM_BASE;
8056 		break;
8057 	case GAUDI2_EVENT_HDMA7_QM:
8058 		index = 7;
8059 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8060 		qman_base = mmDCORE3_EDMA1_QM_BASE;
8061 		break;
8062 	case GAUDI2_EVENT_PDMA0_QM:
8063 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8064 		qman_base = mmPDMA0_QM_BASE;
8065 		break;
8066 	case GAUDI2_EVENT_PDMA1_QM:
8067 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8068 		qman_base = mmPDMA1_QM_BASE;
8069 		break;
8070 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8071 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8072 		qman_base = mmROT0_QM_BASE;
8073 		break;
8074 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8075 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8076 		qman_base = mmROT1_QM_BASE;
8077 		break;
8078 	default:
8079 		return 0;
8080 	}
8081 
8082 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8083 
8084 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8085 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8086 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8087 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8088 	}
8089 
8090 	hl_check_for_glbl_errors(hdev);
8091 
8092 	return error_count;
8093 }
8094 
8095 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8096 {
8097 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8098 
8099 	sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
8100 
8101 	for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8102 		if (sts_val & BIT(i)) {
8103 			gaudi2_print_event(hdev, event_type, true,
8104 				"err cause: %s", gaudi2_arc_sei_error_cause[i]);
8105 			sts_clr_val |= BIT(i);
8106 			error_count++;
8107 		}
8108 	}
8109 
8110 	hl_check_for_glbl_errors(hdev);
8111 
8112 	WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
8113 
8114 	return error_count;
8115 }
8116 
8117 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8118 {
8119 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8120 
8121 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8122 
8123 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8124 		if (sts_val & BIT(i)) {
8125 			gaudi2_print_event(hdev, event_type, true,
8126 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8127 			sts_clr_val |= BIT(i);
8128 			error_count++;
8129 		}
8130 	}
8131 
8132 	hl_check_for_glbl_errors(hdev);
8133 
8134 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8135 
8136 	return error_count;
8137 }
8138 
8139 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8140 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8141 					u64 *event_mask)
8142 {
8143 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8144 	u32 error_count = 0;
8145 	int i;
8146 
8147 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8148 		if (intr_cause_data & BIT(i)) {
8149 			gaudi2_print_event(hdev, event_type, true,
8150 				"err cause: %s", guadi2_rot_error_cause[i]);
8151 			error_count++;
8152 		}
8153 
8154 	/* check if RAZWI happened */
8155 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8156 	hl_check_for_glbl_errors(hdev);
8157 
8158 	return error_count;
8159 }
8160 
8161 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8162 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8163 					u64 *event_mask)
8164 {
8165 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8166 	u32 error_count = 0;
8167 	int i;
8168 
8169 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8170 		if (intr_cause_data & BIT(i)) {
8171 			gaudi2_print_event(hdev, event_type, true,
8172 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8173 			error_count++;
8174 		}
8175 
8176 	/* check if RAZWI happened */
8177 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8178 	hl_check_for_glbl_errors(hdev);
8179 
8180 	return error_count;
8181 }
8182 
8183 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8184 					u64 *event_mask)
8185 {
8186 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8187 	int i;
8188 
8189 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8190 		/* DCORE DEC */
8191 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8192 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8193 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8194 	else
8195 		/* PCIE DEC */
8196 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8197 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8198 
8199 	sts_val = RREG32(sts_addr);
8200 
8201 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8202 		if (sts_val & BIT(i)) {
8203 			gaudi2_print_event(hdev, event_type, true,
8204 				"err cause: %s", gaudi2_dec_error_cause[i]);
8205 			sts_clr_val |= BIT(i);
8206 			error_count++;
8207 		}
8208 	}
8209 
8210 	/* check if RAZWI happened */
8211 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8212 	hl_check_for_glbl_errors(hdev);
8213 
8214 	/* Write 1 clear errors */
8215 	WREG32(sts_addr, sts_clr_val);
8216 
8217 	return error_count;
8218 }
8219 
8220 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8221 					u64 *event_mask)
8222 {
8223 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8224 	int i;
8225 
8226 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8227 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8228 
8229 	sts_val = RREG32(sts_addr);
8230 
8231 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8232 		if (sts_val & BIT(i)) {
8233 			gaudi2_print_event(hdev, event_type, true,
8234 				"err cause: %s", guadi2_mme_error_cause[i]);
8235 			sts_clr_val |= BIT(i);
8236 			error_count++;
8237 		}
8238 	}
8239 
8240 	/* check if RAZWI happened */
8241 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8242 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8243 
8244 	hl_check_for_glbl_errors(hdev);
8245 
8246 	WREG32(sts_clr_addr, sts_clr_val);
8247 
8248 	return error_count;
8249 }
8250 
8251 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8252 					u64 intr_cause_data)
8253 {
8254 	int i, error_count = 0;
8255 
8256 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8257 		if (intr_cause_data & BIT(i)) {
8258 			gaudi2_print_event(hdev, event_type, true,
8259 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8260 			error_count++;
8261 		}
8262 
8263 	hl_check_for_glbl_errors(hdev);
8264 
8265 	return error_count;
8266 }
8267 
8268 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8269 					u64 *event_mask)
8270 {
8271 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8272 	int i;
8273 
8274 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8275 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8276 
8277 	sts_val = RREG32(sts_addr);
8278 
8279 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8280 		if (sts_val & BIT(i)) {
8281 			gaudi2_print_event(hdev, event_type, true,
8282 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8283 			sts_clr_val |= BIT(i);
8284 			error_count++;
8285 		}
8286 	}
8287 
8288 	/* check if RAZWI happened on WAP0/1 */
8289 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8290 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8291 	hl_check_for_glbl_errors(hdev);
8292 
8293 	WREG32(sts_clr_addr, sts_clr_val);
8294 
8295 	return error_count;
8296 }
8297 
8298 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8299 					u64 intr_cause_data)
8300 {
8301 	u32 error_count = 0;
8302 	int i;
8303 
8304 	/* If an AXI read or write error is received, an error is reported and
8305 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8306 	 * register of the KDMA engine, the reported error is always HBW even if
8307 	 * the actual error caused by a LBW KDMA transaction.
8308 	 */
8309 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8310 		if (intr_cause_data & BIT(i)) {
8311 			gaudi2_print_event(hdev, event_type, true,
8312 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8313 			error_count++;
8314 		}
8315 
8316 	hl_check_for_glbl_errors(hdev);
8317 
8318 	return error_count;
8319 }
8320 
8321 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
8322 					u64 intr_cause_data)
8323 {
8324 	u32 error_count = 0;
8325 	int i;
8326 
8327 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8328 		if (intr_cause_data & BIT(i)) {
8329 			gaudi2_print_event(hdev, event_type, true,
8330 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8331 			error_count++;
8332 		}
8333 
8334 	hl_check_for_glbl_errors(hdev);
8335 
8336 	return error_count;
8337 }
8338 
8339 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8340 {
8341 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8342 
8343 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8344 	if (RREG32(razwi_happened_addr)) {
8345 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8346 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8347 		WREG32(razwi_happened_addr, 0x1);
8348 	}
8349 
8350 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8351 	if (RREG32(razwi_happened_addr)) {
8352 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8353 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8354 		WREG32(razwi_happened_addr, 0x1);
8355 	}
8356 
8357 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8358 	if (RREG32(razwi_happened_addr)) {
8359 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8360 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8361 		WREG32(razwi_happened_addr, 0x1);
8362 	}
8363 
8364 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8365 	if (RREG32(razwi_happened_addr)) {
8366 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8367 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8368 		WREG32(razwi_happened_addr, 0x1);
8369 	}
8370 }
8371 
8372 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8373 					u64 intr_cause_data, u64 *event_mask)
8374 {
8375 	u32 error_count = 0;
8376 	int i;
8377 
8378 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8379 		if (!(intr_cause_data & BIT_ULL(i)))
8380 			continue;
8381 
8382 		gaudi2_print_event(hdev, event_type, true,
8383 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8384 		error_count++;
8385 
8386 		switch (intr_cause_data & BIT_ULL(i)) {
8387 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8388 			hl_check_for_glbl_errors(hdev);
8389 			break;
8390 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8391 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8392 			break;
8393 		}
8394 	}
8395 
8396 	return error_count;
8397 }
8398 
8399 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8400 				u64 intr_cause_data)
8401 
8402 {
8403 	u32 error_count = 0;
8404 	int i;
8405 
8406 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8407 		if (intr_cause_data & BIT_ULL(i)) {
8408 			gaudi2_print_event(hdev, event_type, true,
8409 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8410 			error_count++;
8411 		}
8412 	}
8413 
8414 	return error_count;
8415 }
8416 
8417 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8418 {
8419 	u32 error_count = 0;
8420 	int i;
8421 
8422 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8423 		if (intr_cause_data & BIT_ULL(i)) {
8424 			gaudi2_print_event(hdev, event_type, true,
8425 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8426 			error_count++;
8427 		}
8428 	}
8429 
8430 	return error_count;
8431 }
8432 
8433 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8434 					u64 *event_mask)
8435 {
8436 	u32 valid, val, axid_l, axid_h;
8437 	u64 addr;
8438 
8439 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8440 
8441 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8442 		return;
8443 
8444 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8445 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8446 	addr <<= 32;
8447 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8448 
8449 	axid_l = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_LSB));
8450 	axid_h = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_MSB));
8451 
8452 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx, transaction id 0x%llX\n",
8453 				is_pmmu ? "PMMU" : "HMMU", addr, ((u64)axid_h << 32) + axid_l);
8454 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8455 
8456 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8457 }
8458 
8459 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8460 {
8461 	u32 valid, val;
8462 	u64 addr;
8463 
8464 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8465 
8466 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8467 		return;
8468 
8469 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8470 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8471 	addr <<= 32;
8472 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8473 
8474 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8475 				is_pmmu ? "PMMU" : "HMMU", addr);
8476 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8477 }
8478 
8479 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8480 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8481 {
8482 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8483 	int i;
8484 
8485 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8486 
8487 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8488 		if (spi_sei_cause & BIT(i)) {
8489 			gaudi2_print_event(hdev, event_type, true,
8490 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8491 
8492 			if (i == 0)
8493 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8494 			else if (i == 1)
8495 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8496 
8497 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8498 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8499 
8500 			error_count++;
8501 		}
8502 	}
8503 
8504 	/* Clear cause */
8505 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8506 
8507 	/* Clear interrupt */
8508 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8509 
8510 	return error_count;
8511 }
8512 
8513 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8514 {
8515 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8516 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8517 	int i;
8518 
8519 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8520 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8521 
8522 	sei_cause_val = RREG32(sei_cause_addr);
8523 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8524 	cq_intr_val = RREG32(cq_intr_addr);
8525 
8526 	/* SEI interrupt */
8527 	if (sei_cause_cause) {
8528 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8529 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8530 					sei_cause_val);
8531 
8532 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8533 			if (!(sei_cause_cause & BIT(i)))
8534 				continue;
8535 
8536 			gaudi2_print_event(hdev, event_type, true,
8537 				"err cause: %s. %s: 0x%X\n",
8538 				gaudi2_sm_sei_cause[i].cause_name,
8539 				gaudi2_sm_sei_cause[i].log_name,
8540 				sei_cause_log);
8541 			error_count++;
8542 			break;
8543 		}
8544 
8545 		/* Clear SM_SEI_CAUSE */
8546 		WREG32(sei_cause_addr, 0);
8547 	}
8548 
8549 	/* CQ interrupt */
8550 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8551 		cq_intr_queue_index =
8552 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8553 					cq_intr_val);
8554 
8555 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8556 				sm_index, cq_intr_queue_index);
8557 		error_count++;
8558 
8559 		/* Clear CQ_INTR */
8560 		WREG32(cq_intr_addr, 0);
8561 	}
8562 
8563 	hl_check_for_glbl_errors(hdev);
8564 
8565 	return error_count;
8566 }
8567 
8568 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8569 {
8570 	bool is_pmmu = false;
8571 	u32 error_count = 0;
8572 	u64 mmu_base;
8573 	u8 index;
8574 
8575 	switch (event_type) {
8576 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8577 		index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8578 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8579 		break;
8580 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8581 		index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8582 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8583 		break;
8584 	case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8585 		index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8586 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8587 		break;
8588 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8589 		index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8590 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8591 		break;
8592 	case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8593 		index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8594 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8595 		break;
8596 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8597 		index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8598 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8599 		break;
8600 	case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8601 		index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8602 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8603 		break;
8604 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8605 		index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8606 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8607 		break;
8608 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8609 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8610 		is_pmmu = true;
8611 		mmu_base = mmPMMU_HBW_MMU_BASE;
8612 		break;
8613 	default:
8614 		return 0;
8615 	}
8616 
8617 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
8618 							is_pmmu, event_mask);
8619 	hl_check_for_glbl_errors(hdev);
8620 
8621 	return error_count;
8622 }
8623 
8624 
8625 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
8626 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8627 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8628 {
8629 	u32 addr, beat, beat_shift;
8630 	bool rc = false;
8631 
8632 	dev_err_ratelimited(hdev->dev,
8633 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8634 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8635 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8636 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8637 
8638 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8639 	dev_err_ratelimited(hdev->dev,
8640 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8641 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8642 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8643 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8644 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8645 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8646 
8647 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
8648 	for (beat = 0 ; beat < 4 ; beat++) {
8649 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8650 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8651 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8652 						beat,
8653 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8654 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8655 
8656 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8657 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8658 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8659 						beat,
8660 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8661 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8662 			rc |= true;
8663 		}
8664 
8665 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8666 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8667 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8668 			dev_err_ratelimited(hdev->dev,
8669 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8670 					beat,
8671 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8672 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8673 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8674 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8675 			rc |= true;
8676 		}
8677 
8678 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8679 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8680 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8681 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8682 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8683 	}
8684 
8685 	return rc;
8686 }
8687 
8688 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8689 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8690 {
8691 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8692 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8693 
8694 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8695 
8696 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8697 				derr & 0x3, derr & 0xc);
8698 
8699 	/* JIRA H6-3286 - the following prints may not be valid */
8700 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8701 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8702 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8703 		dev_err_ratelimited(hdev->dev,
8704 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8705 				i,
8706 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8707 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8708 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8709 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8710 	}
8711 }
8712 
8713 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8714 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8715 {
8716 	__le32 *col_cmd = ca_par_err_data->dbg_col;
8717 	__le16 *row_cmd = ca_par_err_data->dbg_row;
8718 	u32 i;
8719 
8720 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8721 
8722 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8723 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8724 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8725 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8726 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8727 }
8728 
8729 /* Returns true if hard reset is needed or false otherwise */
8730 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8731 					struct hl_eq_hbm_sei_data *sei_data)
8732 {
8733 	bool require_hard_reset = false;
8734 	u32 hbm_id, mc_id, cause_idx;
8735 
8736 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8737 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8738 
8739 	cause_idx = sei_data->hdr.sei_cause;
8740 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8741 		gaudi2_print_event(hdev, event_type, true,
8742 			"err cause: %s",
8743 			"Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx);
8744 		return true;
8745 	}
8746 
8747 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
8748 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8749 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
8750 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8751 		hbm_mc_sei_cause[cause_idx]);
8752 
8753 	/* Print error-specific info */
8754 	switch (cause_idx) {
8755 	case HBM_SEI_CATTRIP:
8756 		require_hard_reset = true;
8757 		break;
8758 
8759 	case  HBM_SEI_CMD_PARITY_EVEN:
8760 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8761 						le32_to_cpu(sei_data->hdr.cnt));
8762 		require_hard_reset = true;
8763 		break;
8764 
8765 	case  HBM_SEI_CMD_PARITY_ODD:
8766 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8767 						le32_to_cpu(sei_data->hdr.cnt));
8768 		require_hard_reset = true;
8769 		break;
8770 
8771 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
8772 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8773 						le32_to_cpu(sei_data->hdr.cnt));
8774 		require_hard_reset = true;
8775 		break;
8776 
8777 	case HBM_SEI_READ_ERR:
8778 		/* Unlike other SEI events, read error requires further processing of the
8779 		 * raw data in order to determine the root cause.
8780 		 */
8781 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8782 								&sei_data->read_err_info,
8783 								le32_to_cpu(sei_data->hdr.cnt));
8784 		break;
8785 
8786 	default:
8787 		break;
8788 	}
8789 
8790 	require_hard_reset |= !!sei_data->hdr.is_critical;
8791 
8792 	return require_hard_reset;
8793 }
8794 
8795 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
8796 				u64 intr_cause_data)
8797 {
8798 	if (intr_cause_data) {
8799 		gaudi2_print_event(hdev, event_type, true,
8800 			"temperature error cause: %#llx", intr_cause_data);
8801 		return 1;
8802 	}
8803 
8804 	return 0;
8805 }
8806 
8807 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8808 {
8809 	u32 i, error_count = 0;
8810 
8811 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8812 		if (intr_cause_data & hbm_mc_spi[i].mask) {
8813 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8814 				hbm_mc_spi[i].cause);
8815 			error_count++;
8816 		}
8817 
8818 	return error_count;
8819 }
8820 
8821 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8822 {
8823 	ktime_t zero_time = ktime_set(0, 0);
8824 
8825 	mutex_lock(&hdev->clk_throttling.lock);
8826 
8827 	switch (event_type) {
8828 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8829 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8830 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8831 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8832 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8833 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8834 		break;
8835 
8836 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8837 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8838 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8839 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8840 		break;
8841 
8842 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8843 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8844 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8845 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8846 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8847 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8848 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8849 		break;
8850 
8851 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8852 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8853 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8854 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8855 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8856 		break;
8857 
8858 	default:
8859 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8860 		break;
8861 	}
8862 
8863 	mutex_unlock(&hdev->clk_throttling.lock);
8864 }
8865 
8866 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
8867 					struct cpucp_pkt_sync_err *sync_err)
8868 {
8869 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8870 
8871 	gaudi2_print_event(hdev, event_type, false,
8872 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8873 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
8874 		q->pi, atomic_read(&q->ci));
8875 }
8876 
8877 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
8878 {
8879 	u32 p2p_intr, msix_gw_intr, error_count = 0;
8880 
8881 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8882 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8883 
8884 	if (p2p_intr) {
8885 		gaudi2_print_event(hdev, event_type, true,
8886 			"pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8887 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8888 
8889 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8890 		error_count++;
8891 	}
8892 
8893 	if (msix_gw_intr) {
8894 		gaudi2_print_event(hdev, event_type, true,
8895 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8896 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8897 
8898 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8899 		error_count++;
8900 	}
8901 
8902 	return error_count;
8903 }
8904 
8905 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
8906 			struct hl_eq_pcie_drain_ind_data *drain_data)
8907 {
8908 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
8909 
8910 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8911 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8912 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8913 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8914 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8915 
8916 	if (cause & BIT_ULL(0)) {
8917 		dev_err_ratelimited(hdev->dev,
8918 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8919 			!!lbw_rd, !!lbw_wr);
8920 		error_count++;
8921 	}
8922 
8923 	if (cause & BIT_ULL(1)) {
8924 		dev_err_ratelimited(hdev->dev,
8925 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8926 			hbw_rd, hbw_wr);
8927 		error_count++;
8928 	}
8929 
8930 	return error_count;
8931 }
8932 
8933 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8934 {
8935 	u32 error_count = 0;
8936 	int i;
8937 
8938 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8939 		if (intr_cause_data & BIT_ULL(i)) {
8940 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8941 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
8942 			error_count++;
8943 		}
8944 	}
8945 
8946 	hl_check_for_glbl_errors(hdev);
8947 
8948 	return error_count;
8949 }
8950 
8951 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
8952 					struct cpucp_pkt_sync_err *sync_err)
8953 {
8954 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8955 
8956 	gaudi2_print_event(hdev, event_type, false,
8957 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8958 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8959 }
8960 
8961 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
8962 					struct hl_eq_engine_arc_intr_data *data)
8963 {
8964 	struct hl_engine_arc_dccm_queue_full_irq *q;
8965 	u32 intr_type, engine_id;
8966 	u64 payload;
8967 
8968 	intr_type = le32_to_cpu(data->intr_type);
8969 	engine_id = le32_to_cpu(data->engine_id);
8970 	payload = le64_to_cpu(data->payload);
8971 
8972 	switch (intr_type) {
8973 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8974 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8975 
8976 		gaudi2_print_event(hdev, event_type, true,
8977 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8978 				engine_id, intr_type, q->queue_index);
8979 		return 1;
8980 	default:
8981 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n");
8982 		return 0;
8983 	}
8984 }
8985 
8986 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8987 {
8988 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
8989 	bool reset_required = false, is_critical = false;
8990 	u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0;
8991 	u64 event_mask = 0;
8992 	u16 event_type;
8993 
8994 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
8995 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8996 
8997 	if (event_type >= GAUDI2_EVENT_SIZE) {
8998 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8999 				event_type, GAUDI2_EVENT_SIZE - 1);
9000 		return;
9001 	}
9002 
9003 	gaudi2->events_stat[event_type]++;
9004 	gaudi2->events_stat_aggregate[event_type]++;
9005 
9006 	switch (event_type) {
9007 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9008 		fallthrough;
9009 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9010 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9011 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9012 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9013 		is_critical = eq_entry->ecc_data.is_critical;
9014 		error_count++;
9015 		break;
9016 
9017 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9018 		fallthrough;
9019 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9020 		fallthrough;
9021 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9022 		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9023 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9024 		break;
9025 
9026 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9027 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9028 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9029 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9030 		break;
9031 
9032 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9033 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9034 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9035 		break;
9036 
9037 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9038 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9039 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9040 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9041 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9042 		break;
9043 
9044 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9045 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9046 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9047 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9048 					&eq_entry->razwi_with_intr_cause, &event_mask);
9049 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9050 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9051 		break;
9052 
9053 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9054 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9055 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9056 						&eq_entry->razwi_with_intr_cause, &event_mask);
9057 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9058 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9059 		break;
9060 
9061 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9062 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9063 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9064 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9065 		break;
9066 
9067 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9068 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9069 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9070 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9071 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9072 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9073 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9074 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9075 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9076 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9077 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9078 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9079 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9080 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9081 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9082 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9083 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9084 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9085 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9086 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9087 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9088 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9089 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9090 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9091 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9092 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9093 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9094 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9095 					&eq_entry->razwi_with_intr_cause, &event_mask);
9096 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9097 		break;
9098 
9099 	case GAUDI2_EVENT_DEC0_SPI:
9100 	case GAUDI2_EVENT_DEC1_SPI:
9101 	case GAUDI2_EVENT_DEC2_SPI:
9102 	case GAUDI2_EVENT_DEC3_SPI:
9103 	case GAUDI2_EVENT_DEC4_SPI:
9104 	case GAUDI2_EVENT_DEC5_SPI:
9105 	case GAUDI2_EVENT_DEC6_SPI:
9106 	case GAUDI2_EVENT_DEC7_SPI:
9107 	case GAUDI2_EVENT_DEC8_SPI:
9108 	case GAUDI2_EVENT_DEC9_SPI:
9109 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9110 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9111 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9112 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9113 		break;
9114 
9115 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9116 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9117 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9118 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9119 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9120 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9121 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9122 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9123 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9124 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9125 		break;
9126 
9127 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9128 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9129 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9130 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9131 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9132 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9133 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9134 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9135 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9136 		break;
9137 
9138 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9139 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9140 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9141 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9142 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9143 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9144 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9145 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9146 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9147 		break;
9148 
9149 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9150 	case GAUDI2_EVENT_KDMA0_CORE:
9151 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9152 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9153 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9154 		break;
9155 
9156 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9157 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9158 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9159 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9160 		break;
9161 
9162 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9163 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9164 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9165 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9166 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9167 		break;
9168 
9169 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9170 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9171 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9172 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9173 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9174 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9175 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9176 		break;
9177 
9178 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9179 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9180 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9181 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9182 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9183 		break;
9184 
9185 	case GAUDI2_EVENT_PMMU_FATAL_0:
9186 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9187 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9188 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9189 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9190 		break;
9191 
9192 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9193 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9194 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9195 		break;
9196 
9197 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9198 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9199 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9200 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9201 			reset_required = true;
9202 		}
9203 		error_count++;
9204 		break;
9205 
9206 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9207 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9208 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9209 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9210 		break;
9211 
9212 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9213 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9214 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9215 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9216 		break;
9217 
9218 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9219 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9220 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9221 		break;
9222 
9223 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9224 		error_count = gaudi2_handle_psoc_drain(hdev,
9225 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9226 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9227 		break;
9228 
9229 	case GAUDI2_EVENT_CPU_AXI_ECC:
9230 		error_count = GAUDI2_NA_EVENT_CAUSE;
9231 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9232 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9233 		break;
9234 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9235 		error_count = GAUDI2_NA_EVENT_CAUSE;
9236 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9237 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9238 		break;
9239 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9240 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9241 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9242 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9243 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9244 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9245 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9246 		break;
9247 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9248 		error_count = GAUDI2_NA_EVENT_CAUSE;
9249 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9250 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9251 		break;
9252 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9253 		error_count = GAUDI2_NA_EVENT_CAUSE;
9254 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9255 		break;
9256 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9257 		error_count = GAUDI2_NA_EVENT_CAUSE;
9258 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9259 		break;
9260 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9261 		error_count = GAUDI2_NA_EVENT_CAUSE;
9262 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9263 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9264 		break;
9265 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9266 		error_count = GAUDI2_NA_EVENT_CAUSE;
9267 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9268 		break;
9269 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9270 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9271 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9272 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9273 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9274 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9275 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9276 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9277 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9278 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9279 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9280 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9281 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9282 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9283 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9284 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9285 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9286 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9287 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9288 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9289 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9290 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9291 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9292 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9293 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9294 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9295 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9296 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9297 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9298 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9299 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9300 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9301 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9302 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9303 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9304 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9305 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9306 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9307 		fallthrough;
9308 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9309 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9310 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9311 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9312 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9313 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9314 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9315 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9316 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9317 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9318 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9319 		error_count = GAUDI2_NA_EVENT_CAUSE;
9320 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9321 		break;
9322 
9323 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9324 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9325 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9326 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9327 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9328 		error_count = GAUDI2_NA_EVENT_CAUSE;
9329 		break;
9330 
9331 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9332 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9333 		error_count = GAUDI2_NA_EVENT_CAUSE;
9334 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9335 		break;
9336 
9337 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9338 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9339 		error_count = GAUDI2_NA_EVENT_CAUSE;
9340 		/* Do nothing- FW will handle it */
9341 		break;
9342 
9343 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9344 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9345 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9346 		break;
9347 
9348 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9349 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9350 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9351 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9352 		break;
9353 
9354 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9355 		error_count = GAUDI2_NA_EVENT_CAUSE;
9356 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9357 		break;
9358 
9359 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9360 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9361 						le64_to_cpu(eq_entry->data[0]));
9362 		error_count = GAUDI2_NA_EVENT_CAUSE;
9363 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9364 		break;
9365 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9366 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9367 						le64_to_cpu(eq_entry->data[0]));
9368 		error_count = GAUDI2_NA_EVENT_CAUSE;
9369 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9370 		break;
9371 
9372 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9373 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9374 		error_count = GAUDI2_NA_EVENT_CAUSE;
9375 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9376 		break;
9377 
9378 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9379 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9380 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9381 		break;
9382 
9383 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9384 	case GAUDI2_EVENT_DEV_RESET_REQ:
9385 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9386 		error_count = GAUDI2_NA_EVENT_CAUSE;
9387 		is_critical = true;
9388 		break;
9389 
9390 	default:
9391 		if (gaudi2_irq_map_table[event_type].valid) {
9392 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9393 						event_type);
9394 			error_count = GAUDI2_NA_EVENT_CAUSE;
9395 		}
9396 	}
9397 
9398 	/* Make sure to dump an error in case no error cause was printed so far.
9399 	 * Note that although we have counted the errors, we use this number as
9400 	 * a boolean.
9401 	 */
9402 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9403 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9404 	else if (error_count == 0)
9405 		gaudi2_print_event(hdev, event_type, true,
9406 				"No error cause for H/W event %u\n", event_type);
9407 
9408 	if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
9409 				(hdev->hard_reset_on_fw_events ||
9410 				(hdev->asic_prop.fw_security_enabled && is_critical)))
9411 		goto reset_device;
9412 
9413 	/* Send unmask irq only for interrupts not classified as MSG */
9414 	if (!gaudi2_irq_map_table[event_type].msg)
9415 		hl_fw_unmask_irq(hdev, event_type);
9416 
9417 	if (event_mask)
9418 		hl_notifier_event_send_all(hdev, event_mask);
9419 
9420 	return;
9421 
9422 reset_device:
9423 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9424 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9425 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9426 	} else {
9427 		reset_flags |= HL_DRV_RESET_DELAY;
9428 	}
9429 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9430 	hl_device_cond_reset(hdev, reset_flags, event_mask);
9431 }
9432 
9433 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9434 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9435 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9436 {
9437 	u32 ctl, pkt_size;
9438 	int rc = 0;
9439 
9440 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9441 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9442 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9443 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9444 
9445 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9446 	lin_dma_pkt->src_addr = cpu_to_le64(val);
9447 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9448 	lin_dma_pkt->tsize = cpu_to_le32(size);
9449 
9450 	pkt_size = sizeof(struct packet_lin_dma);
9451 
9452 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9453 	if (rc)
9454 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9455 				hw_queue_id);
9456 
9457 	return rc;
9458 }
9459 
9460 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9461 {
9462 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9463 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9464 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9465 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9466 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9467 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9468 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9469 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9470 	void *lin_dma_pkts_arr;
9471 	dma_addr_t pkt_dma_addr;
9472 	int rc = 0, dma_num = 0;
9473 
9474 	if (prop->edma_enabled_mask == 0) {
9475 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9476 		return -EIO;
9477 	}
9478 
9479 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9480 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9481 	comp_addr = CFG_BASE + sob_addr;
9482 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9483 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9484 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9485 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9486 
9487 	/* Calculate how many lin dma pkts we'll need */
9488 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9489 	pkt_size = sizeof(struct packet_lin_dma);
9490 
9491 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9492 					&pkt_dma_addr, GFP_KERNEL);
9493 	if (!lin_dma_pkts_arr)
9494 		return -ENOMEM;
9495 
9496 	/*
9497 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9498 	 * only the first one to restore later
9499 	 * also set the sob addr for all edma cores for completion.
9500 	 * set QM as trusted to allow it to access physical address with MMU bp.
9501 	 */
9502 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9503 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9504 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9505 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9506 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9507 
9508 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9509 				continue;
9510 
9511 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9512 					edma_offset, mmubp);
9513 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9514 					lower_32_bits(comp_addr));
9515 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9516 					upper_32_bits(comp_addr));
9517 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9518 					comp_val);
9519 			gaudi2_qman_set_test_mode(hdev,
9520 					edma_queues_id[dcore] + 4 * edma_idx, true);
9521 		}
9522 	}
9523 
9524 	WREG32(sob_addr, 0);
9525 
9526 	while (cur_addr < end_addr) {
9527 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9528 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9529 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9530 
9531 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9532 					continue;
9533 
9534 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9535 
9536 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9537 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9538 					pkt_dma_addr + dma_num * pkt_size,
9539 					edma_queues_id[dcore] + edma_idx * 4,
9540 					chunk_size, cur_addr, val);
9541 				if (rc)
9542 					goto end;
9543 
9544 				dma_num++;
9545 				cur_addr += chunk_size;
9546 				if (cur_addr == end_addr)
9547 					break;
9548 			}
9549 		}
9550 	}
9551 
9552 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9553 	if (rc) {
9554 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9555 		goto end;
9556 	}
9557 end:
9558 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9559 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9560 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9561 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9562 
9563 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9564 				continue;
9565 
9566 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9567 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9568 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9569 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9570 			gaudi2_qman_set_test_mode(hdev,
9571 					edma_queues_id[dcore] + 4 * edma_idx, false);
9572 		}
9573 	}
9574 
9575 	WREG32(sob_addr, 0);
9576 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
9577 
9578 	return rc;
9579 }
9580 
9581 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9582 {
9583 	int rc;
9584 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9585 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
9586 
9587 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9588 
9589 	if (rc)
9590 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9591 				prop->dram_user_base_address, size);
9592 	return rc;
9593 }
9594 
9595 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9596 {
9597 	int rc;
9598 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9599 	u64 val = hdev->memory_scrub_val;
9600 	u64 addr, size;
9601 
9602 	if (!hdev->memory_scrub)
9603 		return 0;
9604 
9605 	/* scrub SRAM */
9606 	addr = prop->sram_user_base_address;
9607 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9608 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9609 			addr, addr + size, val);
9610 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9611 	if (rc) {
9612 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9613 		return rc;
9614 	}
9615 
9616 	/* scrub DRAM */
9617 	rc = gaudi2_scrub_device_dram(hdev, val);
9618 	if (rc) {
9619 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9620 		return rc;
9621 	}
9622 	return 0;
9623 }
9624 
9625 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9626 {
9627 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9628 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9629 	u32 val, size, offset;
9630 	int dcore_id;
9631 
9632 	offset = hdev->asic_prop.first_available_cq[0] * 4;
9633 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9634 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9635 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9636 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9637 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9638 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9639 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9640 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9641 
9642 	/* memset dcore0 CQ registers */
9643 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9644 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9645 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9646 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9647 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9648 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9649 
9650 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9651 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9652 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9653 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9654 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9655 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9656 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9657 
9658 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9659 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9660 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9661 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9662 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9663 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9664 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9665 
9666 		cq_lbw_l_addr += DCORE_OFFSET;
9667 		cq_lbw_h_addr += DCORE_OFFSET;
9668 		cq_lbw_data_addr += DCORE_OFFSET;
9669 		cq_base_l_addr += DCORE_OFFSET;
9670 		cq_base_h_addr += DCORE_OFFSET;
9671 		cq_size_addr += DCORE_OFFSET;
9672 	}
9673 
9674 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9675 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9676 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9677 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9678 
9679 	/* memset dcore0 monitors */
9680 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9681 
9682 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9683 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
9684 
9685 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9686 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9687 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9688 
9689 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9690 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9691 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9692 		mon_sts_addr += DCORE_OFFSET;
9693 		mon_cfg_addr += DCORE_OFFSET;
9694 	}
9695 
9696 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9697 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9698 	val = 0;
9699 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9700 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9701 
9702 	/* memset dcore0 sobs */
9703 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9704 
9705 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9706 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9707 
9708 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9709 		gaudi2_memset_device_lbw(hdev, addr, size, val);
9710 		addr += DCORE_OFFSET;
9711 	}
9712 
9713 	/* Flush all WREG to prevent race */
9714 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9715 }
9716 
9717 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9718 {
9719 	u32 reg_base, hw_queue_id;
9720 
9721 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9722 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9723 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9724 			continue;
9725 
9726 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9727 
9728 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9729 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9730 	}
9731 
9732 	/* Flush all WREG to prevent race */
9733 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9734 }
9735 
9736 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9737 {
9738 	u32 reg_base, hw_queue_id;
9739 
9740 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9741 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9742 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9743 			continue;
9744 
9745 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9746 
9747 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9748 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9749 	}
9750 
9751 	/* Flush all WREG to prevent race */
9752 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9753 }
9754 
9755 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9756 {
9757 	return 0;
9758 }
9759 
9760 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9761 {
9762 }
9763 
9764 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9765 						struct dup_block_ctx *cfg_ctx)
9766 {
9767 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9768 	u8 seq;
9769 	int i;
9770 
9771 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
9772 		seq = block_idx * cfg_ctx->instances + i;
9773 
9774 		/* skip disabled instance */
9775 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9776 			continue;
9777 
9778 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9779 					cfg_ctx->data);
9780 	}
9781 }
9782 
9783 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9784 						u64 mask)
9785 {
9786 	int i;
9787 
9788 	cfg_ctx->enabled_mask = mask;
9789 
9790 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
9791 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
9792 }
9793 
9794 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9795 {
9796 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9797 }
9798 
9799 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9800 {
9801 	void *host_mem_virtual_addr;
9802 	dma_addr_t host_mem_dma_addr;
9803 	u64 reserved_va_base;
9804 	u32 pos, size_left, size_to_dma;
9805 	struct hl_ctx *ctx;
9806 	int rc = 0;
9807 
9808 	/* Fetch the ctx */
9809 	ctx = hl_get_compute_ctx(hdev);
9810 	if (!ctx) {
9811 		dev_err(hdev->dev, "No ctx available\n");
9812 		return -EINVAL;
9813 	}
9814 
9815 	/* Allocate buffers for read and for poll */
9816 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9817 								GFP_KERNEL | __GFP_ZERO);
9818 	if (host_mem_virtual_addr == NULL) {
9819 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9820 		rc = -ENOMEM;
9821 		goto put_ctx;
9822 	}
9823 
9824 	/* Reserve VM region on asic side */
9825 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9826 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9827 	if (!reserved_va_base) {
9828 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9829 		rc = -ENOMEM;
9830 		goto free_data_buffer;
9831 	}
9832 
9833 	/* Create mapping on asic side */
9834 	mutex_lock(&hdev->mmu_lock);
9835 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9836 	hl_mmu_invalidate_cache_range(hdev, false,
9837 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9838 				      ctx->asid, reserved_va_base, SZ_2M);
9839 	mutex_unlock(&hdev->mmu_lock);
9840 	if (rc) {
9841 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9842 		goto unreserve_va;
9843 	}
9844 
9845 	/* Enable MMU on KDMA */
9846 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9847 
9848 	pos = 0;
9849 	size_left = size;
9850 	size_to_dma = SZ_2M;
9851 
9852 	while (size_left > 0) {
9853 		if (size_left < SZ_2M)
9854 			size_to_dma = size_left;
9855 
9856 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9857 		if (rc)
9858 			break;
9859 
9860 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9861 
9862 		if (size_left <= SZ_2M)
9863 			break;
9864 
9865 		pos += SZ_2M;
9866 		addr += SZ_2M;
9867 		size_left -= SZ_2M;
9868 	}
9869 
9870 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9871 
9872 	mutex_lock(&hdev->mmu_lock);
9873 	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9874 	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9875 				      ctx->asid, reserved_va_base, SZ_2M);
9876 	mutex_unlock(&hdev->mmu_lock);
9877 unreserve_va:
9878 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9879 free_data_buffer:
9880 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9881 put_ctx:
9882 	hl_ctx_put(ctx);
9883 
9884 	return rc;
9885 }
9886 
9887 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9888 {
9889 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9890 	int min_alloc_order, rc;
9891 
9892 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9893 		return 0;
9894 
9895 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9896 								HOST_SPACE_INTERNAL_CB_SZ,
9897 								&hdev->internal_cb_pool_dma_addr,
9898 								GFP_KERNEL | __GFP_ZERO);
9899 
9900 	if (!hdev->internal_cb_pool_virt_addr)
9901 		return -ENOMEM;
9902 
9903 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9904 					gaudi2_get_wait_cb_size(hdev)));
9905 
9906 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9907 	if (!hdev->internal_cb_pool) {
9908 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
9909 		rc = -ENOMEM;
9910 		goto free_internal_cb_pool;
9911 	}
9912 
9913 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9914 				HOST_SPACE_INTERNAL_CB_SZ, -1);
9915 	if (rc) {
9916 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9917 		rc = -EFAULT;
9918 		goto destroy_internal_cb_pool;
9919 	}
9920 
9921 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9922 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9923 
9924 	if (!hdev->internal_cb_va_base) {
9925 		rc = -ENOMEM;
9926 		goto destroy_internal_cb_pool;
9927 	}
9928 
9929 	mutex_lock(&hdev->mmu_lock);
9930 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9931 					HOST_SPACE_INTERNAL_CB_SZ);
9932 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9933 	mutex_unlock(&hdev->mmu_lock);
9934 
9935 	if (rc)
9936 		goto unreserve_internal_cb_pool;
9937 
9938 	return 0;
9939 
9940 unreserve_internal_cb_pool:
9941 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9942 destroy_internal_cb_pool:
9943 	gen_pool_destroy(hdev->internal_cb_pool);
9944 free_internal_cb_pool:
9945 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9946 					hdev->internal_cb_pool_dma_addr);
9947 
9948 	return rc;
9949 }
9950 
9951 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9952 {
9953 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9954 
9955 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9956 		return;
9957 
9958 	mutex_lock(&hdev->mmu_lock);
9959 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9960 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9961 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9962 	mutex_unlock(&hdev->mmu_lock);
9963 
9964 	gen_pool_destroy(hdev->internal_cb_pool);
9965 
9966 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9967 					hdev->internal_cb_pool_dma_addr);
9968 }
9969 
9970 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9971 {
9972 	gaudi2_restore_user_sm_registers(hdev);
9973 	gaudi2_restore_user_qm_registers(hdev);
9974 }
9975 
9976 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9977 {
9978 	struct hl_device *hdev = ctx->hdev;
9979 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9980 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9981 	int rc;
9982 
9983 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9984 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9985 	if (rc)
9986 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9987 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9988 
9989 	return rc;
9990 }
9991 
9992 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9993 {
9994 	struct hl_device *hdev = ctx->hdev;
9995 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9996 	int rc;
9997 
9998 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9999 				prop->pmmu.page_size, true);
10000 	if (rc)
10001 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10002 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10003 }
10004 
10005 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10006 {
10007 	int rc;
10008 
10009 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10010 	if (rc)
10011 		return rc;
10012 
10013 	/* No need to clear user registers if the device has just
10014 	 * performed reset, we restore only nic qm registers
10015 	 */
10016 	if (ctx->hdev->reset_upon_device_release)
10017 		gaudi2_restore_nic_qm_registers(ctx->hdev);
10018 	else
10019 		gaudi2_restore_user_registers(ctx->hdev);
10020 
10021 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10022 	if (rc)
10023 		return rc;
10024 
10025 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10026 	if (rc)
10027 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10028 
10029 	return rc;
10030 }
10031 
10032 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10033 {
10034 	if (ctx->asid == HL_KERNEL_ASID_ID)
10035 		return;
10036 
10037 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10038 
10039 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10040 }
10041 
10042 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10043 {
10044 	struct hl_device *hdev = cs->ctx->hdev;
10045 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10046 	u32 mon_payload, sob_id, mon_id;
10047 
10048 	if (!cs_needs_completion(cs))
10049 		return 0;
10050 
10051 	/*
10052 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10053 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10054 	 * cyclic index. The SOB value is increased when each of the CS jobs is
10055 	 * completed. When the SOB reaches the number of CS jobs, the monitor
10056 	 * generates MSI-X interrupt.
10057 	 */
10058 
10059 	sob_id = mon_id = index;
10060 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10061 				(1 << CQ_ENTRY_READY_SHIFT) | index;
10062 
10063 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10064 				cs->jobs_cnt);
10065 
10066 	return 0;
10067 }
10068 
10069 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10070 {
10071 	return HL_INVALID_QUEUE;
10072 }
10073 
10074 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10075 {
10076 	struct hl_cb *cb = data;
10077 	struct packet_msg_short *pkt;
10078 	u32 value, ctl, pkt_size = sizeof(*pkt);
10079 
10080 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10081 	memset(pkt, 0, pkt_size);
10082 
10083 	/* Inc by 1, Mode ADD */
10084 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10085 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10086 
10087 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10088 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10089 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10090 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10091 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10092 
10093 	pkt->value = cpu_to_le32(value);
10094 	pkt->ctl = cpu_to_le32(ctl);
10095 
10096 	return size + pkt_size;
10097 }
10098 
10099 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10100 {
10101 	u32 ctl, pkt_size = sizeof(*pkt);
10102 
10103 	memset(pkt, 0, pkt_size);
10104 
10105 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10106 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10107 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10108 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10109 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10110 
10111 	pkt->value = cpu_to_le32(value);
10112 	pkt->ctl = cpu_to_le32(ctl);
10113 
10114 	return pkt_size;
10115 }
10116 
10117 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10118 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10119 {
10120 	u32 ctl, value, pkt_size = sizeof(*pkt);
10121 	u8 mask;
10122 
10123 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10124 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10125 		return 0;
10126 	}
10127 
10128 	memset(pkt, 0, pkt_size);
10129 
10130 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10131 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10132 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10133 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10134 
10135 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10136 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10137 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10138 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10139 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10140 
10141 	pkt->value = cpu_to_le32(value);
10142 	pkt->ctl = cpu_to_le32(ctl);
10143 
10144 	return pkt_size;
10145 }
10146 
10147 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10148 {
10149 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10150 
10151 	memset(pkt, 0, pkt_size);
10152 
10153 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10154 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10155 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10156 
10157 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10158 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10159 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10160 
10161 	pkt->cfg = cpu_to_le32(cfg);
10162 	pkt->ctl = cpu_to_le32(ctl);
10163 
10164 	return pkt_size;
10165 }
10166 
10167 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10168 {
10169 	struct hl_cb *cb = prop->data;
10170 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10171 
10172 	u64 monitor_base, fence_addr = 0;
10173 	u32 stream_index, size = prop->size;
10174 	u16 msg_addr_offset;
10175 
10176 	stream_index = prop->q_idx % 4;
10177 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10178 			QM_FENCE2_OFFSET + stream_index * 4;
10179 
10180 	/*
10181 	 * monitor_base should be the content of the base0 address registers,
10182 	 * so it will be added to the msg short offsets
10183 	 */
10184 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10185 
10186 	/* First monitor config packet: low address of the sync */
10187 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10188 				monitor_base;
10189 
10190 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10191 
10192 	/* Second monitor config packet: high address of the sync */
10193 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10194 				monitor_base;
10195 
10196 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10197 
10198 	/*
10199 	 * Third monitor config packet: the payload, i.e. what to write when the
10200 	 * sync triggers
10201 	 */
10202 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10203 				monitor_base;
10204 
10205 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10206 
10207 	/* Fourth monitor config packet: bind the monitor to a sync object */
10208 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10209 
10210 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10211 						prop->sob_val, msg_addr_offset);
10212 
10213 	/* Fence packet */
10214 	size += gaudi2_add_fence_pkt(buf + size);
10215 
10216 	return size;
10217 }
10218 
10219 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10220 {
10221 	struct hl_hw_sob *hw_sob = data;
10222 
10223 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10224 
10225 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10226 
10227 	kref_init(&hw_sob->kref);
10228 }
10229 
10230 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10231 {
10232 }
10233 
10234 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10235 {
10236 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10237 
10238 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10239 }
10240 
10241 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10242 {
10243 	return 0;
10244 }
10245 
10246 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10247 					struct hl_cs *cs, u32 wait_queue_id,
10248 					u32 collective_engine_id, u32 encaps_signal_offset)
10249 {
10250 	return -EINVAL;
10251 }
10252 
10253 /*
10254  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10255  *                   to DMMU page-size address (64MB) before mapping it in
10256  *                   the MMU.
10257  * The operation is performed on both the virtual and physical addresses.
10258  * for device with 6 HBMs the scramble is:
10259  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10260  *
10261  * Example:
10262  * =============================================================================
10263  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10264  * Phys address                                                     in MMU last
10265  *                                                                    HOP
10266  * =============================================================================
10267  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10268  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10269  * =============================================================================
10270  */
10271 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10272 {
10273 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10274 	u32 divisor, mod_va;
10275 	u64 div_va;
10276 
10277 	/* accept any address in the DRAM address space */
10278 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10279 									VA_HBM_SPACE_END)) {
10280 
10281 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10282 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10283 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10284 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10285 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10286 	}
10287 
10288 	return raw_addr;
10289 }
10290 
10291 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10292 {
10293 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10294 	u32 divisor, mod_va;
10295 	u64 div_va;
10296 
10297 	/* accept any address in the DRAM address space */
10298 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10299 									VA_HBM_SPACE_END)) {
10300 
10301 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10302 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10303 					PAGE_SIZE_64MB, &mod_va);
10304 
10305 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10306 					(div_va * divisor + mod_va));
10307 	}
10308 
10309 	return scrambled_addr;
10310 }
10311 
10312 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10313 {
10314 	u32 base = 0, dcore_id, dec_id;
10315 
10316 	if (core_id >= NUMBER_OF_DEC) {
10317 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10318 		goto out;
10319 	}
10320 
10321 	if (core_id < 8) {
10322 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10323 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10324 
10325 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10326 				dec_id * DCORE_VDEC_OFFSET;
10327 	} else {
10328 		/* PCIe Shared Decoder */
10329 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10330 	}
10331 out:
10332 	return base;
10333 }
10334 
10335 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10336 				u32 *block_size, u32 *block_id)
10337 {
10338 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10339 	int i;
10340 
10341 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10342 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10343 			*block_id = i;
10344 			if (block_size)
10345 				*block_size = gaudi2->mapped_blocks[i].size;
10346 			return 0;
10347 		}
10348 	}
10349 
10350 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10351 
10352 	return -EINVAL;
10353 }
10354 
10355 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10356 			u32 block_id, u32 block_size)
10357 {
10358 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10359 	u64 offset_in_bar;
10360 	u64 address;
10361 	int rc;
10362 
10363 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10364 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10365 		return -EINVAL;
10366 	}
10367 
10368 	/* we allow mapping only an entire block */
10369 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10370 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10371 		return -EINVAL;
10372 	}
10373 
10374 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10375 
10376 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10377 
10378 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10379 			VM_DONTCOPY | VM_NORESERVE);
10380 
10381 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10382 			block_size, vma->vm_page_prot);
10383 	if (rc)
10384 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10385 
10386 	return rc;
10387 }
10388 
10389 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10390 {
10391 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10392 
10393 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10394 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10395 
10396 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10397 		WREG32(irq_handler_offset,
10398 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10399 }
10400 
10401 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10402 {
10403 	switch (mmu_id) {
10404 	case HW_CAP_DCORE0_DMMU0:
10405 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10406 		break;
10407 	case HW_CAP_DCORE0_DMMU1:
10408 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10409 		break;
10410 	case HW_CAP_DCORE0_DMMU2:
10411 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10412 		break;
10413 	case HW_CAP_DCORE0_DMMU3:
10414 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10415 		break;
10416 	case HW_CAP_DCORE1_DMMU0:
10417 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10418 		break;
10419 	case HW_CAP_DCORE1_DMMU1:
10420 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10421 		break;
10422 	case HW_CAP_DCORE1_DMMU2:
10423 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10424 		break;
10425 	case HW_CAP_DCORE1_DMMU3:
10426 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10427 		break;
10428 	case HW_CAP_DCORE2_DMMU0:
10429 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10430 		break;
10431 	case HW_CAP_DCORE2_DMMU1:
10432 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10433 		break;
10434 	case HW_CAP_DCORE2_DMMU2:
10435 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10436 		break;
10437 	case HW_CAP_DCORE2_DMMU3:
10438 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10439 		break;
10440 	case HW_CAP_DCORE3_DMMU0:
10441 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10442 		break;
10443 	case HW_CAP_DCORE3_DMMU1:
10444 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10445 		break;
10446 	case HW_CAP_DCORE3_DMMU2:
10447 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10448 		break;
10449 	case HW_CAP_DCORE3_DMMU3:
10450 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10451 		break;
10452 	case HW_CAP_PMMU:
10453 		*mmu_base = mmPMMU_HBW_MMU_BASE;
10454 		break;
10455 	default:
10456 		return -EINVAL;
10457 	}
10458 
10459 	return 0;
10460 }
10461 
10462 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10463 {
10464 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10465 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10466 	u32 mmu_base;
10467 
10468 	if (!(gaudi2->hw_cap_initialized & mmu_id))
10469 		return;
10470 
10471 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10472 		return;
10473 
10474 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10475 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10476 }
10477 
10478 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10479 {
10480 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10481 
10482 	/* check all HMMUs */
10483 	for (i = 0 ; i < num_of_hmmus ; i++) {
10484 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10485 
10486 		if (mmu_cap_mask & mmu_id)
10487 			gaudi2_ack_mmu_error(hdev, mmu_id);
10488 	}
10489 
10490 	/* check PMMU */
10491 	if (mmu_cap_mask & HW_CAP_PMMU)
10492 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10493 
10494 	return 0;
10495 }
10496 
10497 static void gaudi2_get_msi_info(__le32 *table)
10498 {
10499 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10500 }
10501 
10502 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10503 {
10504 	switch (pll_idx) {
10505 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10506 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10507 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10508 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10509 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10510 	case HL_GAUDI2_MME_PLL: return MME_PLL;
10511 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10512 	case HL_GAUDI2_IF_PLL: return IF_PLL;
10513 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10514 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10515 	case HL_GAUDI2_VID_PLL: return VID_PLL;
10516 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10517 	default: return -EINVAL;
10518 	}
10519 }
10520 
10521 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10522 {
10523 	/* Not implemented */
10524 	return 0;
10525 }
10526 
10527 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10528 {
10529 	/* Not implemented */
10530 	return 0;
10531 }
10532 
10533 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10534 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
10535 {
10536 	/* Not implemented */
10537 	return 0;
10538 }
10539 
10540 
10541 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10542 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
10543 				u32 engine_id, char **buf, size_t *size, size_t *offset)
10544 {
10545 	/* Not implemented */
10546 	return 0;
10547 }
10548 
10549 
10550 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10551 	.monitor_valid = gaudi2_monitor_valid,
10552 	.print_single_monitor = gaudi2_print_single_monitor,
10553 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10554 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
10555 };
10556 
10557 static void gaudi2_state_dump_init(struct hl_device *hdev)
10558 {
10559 	/* Not implemented */
10560 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10561 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10562 }
10563 
10564 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10565 {
10566 	return 0;
10567 }
10568 
10569 static u32 *gaudi2_get_stream_master_qid_arr(void)
10570 {
10571 	return NULL;
10572 }
10573 
10574 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10575 				struct attribute_group *dev_vrm_attr_grp)
10576 {
10577 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10578 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10579 }
10580 
10581 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10582 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
10583 {
10584 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10585 
10586 	/* for host pages the page size must be  */
10587 	if (!is_dram_addr) {
10588 		if (page_size % mmu_prop->page_size)
10589 			goto page_size_err;
10590 
10591 		*real_page_size = mmu_prop->page_size;
10592 		return 0;
10593 	}
10594 
10595 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10596 		goto page_size_err;
10597 
10598 	/*
10599 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10600 	 * than DRAM page size).
10601 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10602 	 * this mismatch when calculating the address to place in the MMU page table.
10603 	 * (in that case also make sure that the dram_page_size is not greater than the
10604 	 * mmu page size)
10605 	 */
10606 	*real_page_size = prop->dram_page_size;
10607 
10608 	return 0;
10609 
10610 page_size_err:
10611 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10612 							page_size, mmu_prop->page_size >> 10);
10613 	return -EFAULT;
10614 }
10615 
10616 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10617 {
10618 	return -EOPNOTSUPP;
10619 }
10620 
10621 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10622 {
10623 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10624 
10625 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
10626 		return 0;
10627 
10628 	return hl_fw_send_device_activity(hdev, open);
10629 }
10630 
10631 static const struct hl_asic_funcs gaudi2_funcs = {
10632 	.early_init = gaudi2_early_init,
10633 	.early_fini = gaudi2_early_fini,
10634 	.late_init = gaudi2_late_init,
10635 	.late_fini = gaudi2_late_fini,
10636 	.sw_init = gaudi2_sw_init,
10637 	.sw_fini = gaudi2_sw_fini,
10638 	.hw_init = gaudi2_hw_init,
10639 	.hw_fini = gaudi2_hw_fini,
10640 	.halt_engines = gaudi2_halt_engines,
10641 	.suspend = gaudi2_suspend,
10642 	.resume = gaudi2_resume,
10643 	.mmap = gaudi2_mmap,
10644 	.ring_doorbell = gaudi2_ring_doorbell,
10645 	.pqe_write = gaudi2_pqe_write,
10646 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10647 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
10648 	.scrub_device_mem = gaudi2_scrub_device_mem,
10649 	.scrub_device_dram = gaudi2_scrub_device_dram,
10650 	.get_int_queue_base = NULL,
10651 	.test_queues = gaudi2_test_queues,
10652 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10653 	.asic_dma_pool_free = gaudi2_dma_pool_free,
10654 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10655 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10656 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
10657 	.asic_dma_map_single = gaudi2_dma_map_single,
10658 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10659 	.cs_parser = gaudi2_cs_parser,
10660 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
10661 	.add_end_of_cb_packets = NULL,
10662 	.update_eq_ci = gaudi2_update_eq_ci,
10663 	.context_switch = gaudi2_context_switch,
10664 	.restore_phase_topology = gaudi2_restore_phase_topology,
10665 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
10666 	.add_device_attr = gaudi2_add_device_attr,
10667 	.handle_eqe = gaudi2_handle_eqe,
10668 	.get_events_stat = gaudi2_get_events_stat,
10669 	.read_pte = NULL,
10670 	.write_pte = NULL,
10671 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10672 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10673 	.mmu_prefetch_cache_range = NULL,
10674 	.send_heartbeat = gaudi2_send_heartbeat,
10675 	.debug_coresight = gaudi2_debug_coresight,
10676 	.is_device_idle = gaudi2_is_device_idle,
10677 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
10678 	.hw_queues_lock = gaudi2_hw_queues_lock,
10679 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
10680 	.get_pci_id = gaudi2_get_pci_id,
10681 	.get_eeprom_data = gaudi2_get_eeprom_data,
10682 	.get_monitor_dump = gaudi2_get_monitor_dump,
10683 	.send_cpu_message = gaudi2_send_cpu_message,
10684 	.pci_bars_map = gaudi2_pci_bars_map,
10685 	.init_iatu = gaudi2_init_iatu,
10686 	.rreg = hl_rreg,
10687 	.wreg = hl_wreg,
10688 	.halt_coresight = gaudi2_halt_coresight,
10689 	.ctx_init = gaudi2_ctx_init,
10690 	.ctx_fini = gaudi2_ctx_fini,
10691 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
10692 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10693 	.load_firmware_to_device = NULL,
10694 	.load_boot_fit_to_device = NULL,
10695 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
10696 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
10697 	.gen_signal_cb = gaudi2_gen_signal_cb,
10698 	.gen_wait_cb = gaudi2_gen_wait_cb,
10699 	.reset_sob = gaudi2_reset_sob,
10700 	.reset_sob_group = gaudi2_reset_sob_group,
10701 	.get_device_time = gaudi2_get_device_time,
10702 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
10703 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10704 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10705 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
10706 	.scramble_addr = gaudi2_mmu_scramble_addr,
10707 	.descramble_addr = gaudi2_mmu_descramble_addr,
10708 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10709 	.get_hw_block_id = gaudi2_get_hw_block_id,
10710 	.hw_block_mmap = gaudi2_block_mmap,
10711 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
10712 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10713 	.get_msi_info = gaudi2_get_msi_info,
10714 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10715 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10716 	.init_firmware_loader = gaudi2_init_firmware_loader,
10717 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10718 	.state_dump_init = gaudi2_state_dump_init,
10719 	.get_sob_addr = &gaudi2_get_sob_addr,
10720 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10721 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10722 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10723 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10724 	.access_dev_mem = hl_access_dev_mem,
10725 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
10726 	.set_engine_cores = gaudi2_set_engine_cores,
10727 	.send_device_activity = gaudi2_send_device_activity,
10728 	.set_dram_properties = gaudi2_set_dram_properties,
10729 	.set_binning_masks = gaudi2_set_binning_masks,
10730 };
10731 
10732 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10733 {
10734 	hdev->asic_funcs = &gaudi2_funcs;
10735 }
10736