xref: /openbmc/linux/drivers/accel/habanalabs/gaudi2/gaudi2.c (revision e65e175b07bef5974045cc42238de99057669ca7)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
16 
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 
22 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
23 
24 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		50000		/* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
29 #define GAUDI2_RESET_POLL_CNT			3
30 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT			512
34 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
39 
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
41 
42 /*
43  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44  * and the code relies on that value (for array size etc..) we define another value
45  * for MAX faulty TPCs which reflects the cluster binning requirements
46  */
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
48 #define MAX_FAULTY_XBARS			1
49 #define MAX_FAULTY_EDMAS			1
50 #define MAX_FAULTY_DECODERS			1
51 
52 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK		0x3FF
55 
56 #define GAUDI2_NA_EVENT_CAUSE			0xFF
57 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
58 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
59 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
60 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
61 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
63 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
64 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
65 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
66 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
67 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
68 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
69 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
70 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
71 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
72 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
73 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
74 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
75 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
76 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
78 
79 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
80 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
81 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
82 
83 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
84 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
85 
86 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
87 
88 #define IS_DMA_IDLE(dma_core_idle_ind_mask)	\
89 	(!((dma_core_idle_ind_mask) &		\
90 	((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
91 	(DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
92 
93 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
94 
95 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
96 
97 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
98 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
99 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
100 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
101 
102 #define PCIE_DEC_EN_MASK			0x300
103 #define DEC_WORK_STATE_IDLE			0
104 #define DEC_WORK_STATE_PEND			3
105 #define IS_DEC_IDLE(dec_swreg15) \
106 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
107 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
108 
109 /* HBM MMU address scrambling parameters */
110 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
111 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
112 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
113 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
114 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
115 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
116 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
117 #define MMU_RANGE_INV_EN_SHIFT			0
118 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
119 #define MMU_RANGE_INV_ASID_SHIFT		2
120 
121 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
122  * a 2 entries FIFO, and hence it is not enabled for it.
123  */
124 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
125 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
126 
127 #define GAUDI2_MAX_STRING_LEN			64
128 
129 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
130 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
131 
132 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
133 
134 enum hl_pmmu_fatal_cause {
135 	LATENCY_RD_OUT_FIFO_OVERRUN,
136 	LATENCY_WR_OUT_FIFO_OVERRUN,
137 };
138 
139 enum hl_pcie_drain_ind_cause {
140 	LBW_AXI_DRAIN_IND,
141 	HBW_AXI_DRAIN_IND
142 };
143 
144 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
145 	[HBM_ID0] = 0xFFFC,
146 	[HBM_ID1] = 0xFFCF,
147 	[HBM_ID2] = 0xF7F7,
148 	[HBM_ID3] = 0x7F7F,
149 	[HBM_ID4] = 0xFCFF,
150 	[HBM_ID5] = 0xCFFF,
151 };
152 
153 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
154 	[0] = HBM_ID0,
155 	[1] = HBM_ID1,
156 	[2] = HBM_ID4,
157 	[3] = HBM_ID5,
158 };
159 
160 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
161 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
162 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
163 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
164 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
165 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
166 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
167 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
168 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
169 };
170 
171 static const int gaudi2_qman_async_event_id[] = {
172 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
173 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
174 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
175 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
176 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
177 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
178 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
179 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
180 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
181 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
182 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
183 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
184 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
185 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
186 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
187 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
188 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
189 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
190 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
191 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
192 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
193 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
194 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
195 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
196 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
197 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
198 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
199 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
200 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
201 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
202 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
203 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
204 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
205 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
206 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
207 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
208 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
209 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
210 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
211 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
212 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
213 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
214 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
215 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
216 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
217 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
218 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
219 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
220 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
221 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
222 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
223 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
224 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
225 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
226 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
227 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
228 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
229 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
230 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
231 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
232 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
233 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
234 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
235 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
236 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
237 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
238 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
239 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
240 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
241 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
242 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
243 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
244 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
245 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
246 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
247 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
248 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
249 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
250 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
251 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
252 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
253 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
254 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
255 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
256 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
257 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
258 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
259 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
260 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
261 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
262 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
263 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
264 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
265 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
266 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
267 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
268 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
269 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
270 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
271 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
272 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
273 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
274 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
275 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
276 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
277 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
278 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
279 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
280 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
281 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
282 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
283 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
284 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
285 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
286 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
287 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
288 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
289 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
290 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
291 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
292 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
293 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
294 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
295 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
296 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
297 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
298 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
299 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
300 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
301 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
302 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
303 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
304 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
305 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
306 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
307 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
308 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
309 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
310 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
311 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
312 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
313 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
314 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
315 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
316 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
317 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
318 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
319 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
320 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
321 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
322 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
323 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
324 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
325 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
326 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
327 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
328 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
329 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
330 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
331 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
332 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
333 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
334 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
335 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
336 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
337 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
338 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
339 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
340 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
341 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
342 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
343 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
344 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
345 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
346 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
347 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
348 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
349 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
350 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
351 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
352 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
353 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
354 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
355 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
356 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
357 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
358 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
359 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
360 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
361 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
362 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
363 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
364 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
365 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
366 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
367 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
368 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
369 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
370 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
371 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
372 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
373 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
374 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
375 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
376 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
377 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
378 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
379 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
380 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
381 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
382 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
383 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
384 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
385 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
386 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
387 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
388 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
389 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
390 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
391 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
392 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
393 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
394 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
395 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
396 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
397 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
398 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
399 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
400 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
401 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
402 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
403 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
404 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
405 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
406 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
407 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
408 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
409 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
410 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
411 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
412 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
413 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
414 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
415 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
416 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
417 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
418 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
419 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
420 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
421 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
422 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
423 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
424 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
426 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
427 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
428 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
429 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
430 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
431 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
432 };
433 
434 static const int gaudi2_dma_core_async_event_id[] = {
435 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
436 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
437 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
438 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
439 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
440 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
441 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
442 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
443 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
444 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
445 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
446 };
447 
448 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
449 	"qman sei intr",
450 	"arc sei intr"
451 };
452 
453 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
454 	"AXI_TERMINATOR WR",
455 	"AXI_TERMINATOR RD",
456 	"AXI SPLIT SEI Status"
457 };
458 
459 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
460 	"cbu_bresp_sei_intr_cause",
461 	"cbu_rresp_sei_intr_cause",
462 	"lbu_bresp_sei_intr_cause",
463 	"lbu_rresp_sei_intr_cause",
464 	"cbu_axi_split_intr_cause",
465 	"lbu_axi_split_intr_cause",
466 	"arc_ip_excptn_sei_intr_cause",
467 	"dmi_bresp_sei_intr_cause",
468 	"aux2apb_err_sei_intr_cause",
469 	"cfg_lbw_wr_terminated_intr_cause",
470 	"cfg_lbw_rd_terminated_intr_cause",
471 	"cfg_dccm_wr_terminated_intr_cause",
472 	"cfg_dccm_rd_terminated_intr_cause",
473 	"cfg_hbw_rd_terminated_intr_cause"
474 };
475 
476 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
477 	"msix_vcd_hbw_sei",
478 	"msix_l2c_hbw_sei",
479 	"msix_nrm_hbw_sei",
480 	"msix_abnrm_hbw_sei",
481 	"msix_vcd_lbw_sei",
482 	"msix_l2c_lbw_sei",
483 	"msix_nrm_lbw_sei",
484 	"msix_abnrm_lbw_sei",
485 	"apb_vcd_lbw_sei",
486 	"apb_l2c_lbw_sei",
487 	"apb_nrm_lbw_sei",
488 	"apb_abnrm_lbw_sei",
489 	"dec_sei",
490 	"dec_apb_sei",
491 	"trc_apb_sei",
492 	"lbw_mstr_if_sei",
493 	"axi_split_bresp_err_sei",
494 	"hbw_axi_wr_viol_sei",
495 	"hbw_axi_rd_viol_sei",
496 	"lbw_axi_wr_viol_sei",
497 	"lbw_axi_rd_viol_sei",
498 	"vcd_spi",
499 	"l2c_spi",
500 	"nrm_spi",
501 	"abnrm_spi",
502 };
503 
504 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
505 	"PQ AXI HBW error",
506 	"CQ AXI HBW error",
507 	"CP AXI HBW error",
508 	"CP error due to undefined OPCODE",
509 	"CP encountered STOP OPCODE",
510 	"CP AXI LBW error",
511 	"CP WRREG32 or WRBULK returned error",
512 	"N/A",
513 	"FENCE 0 inc over max value and clipped",
514 	"FENCE 1 inc over max value and clipped",
515 	"FENCE 2 inc over max value and clipped",
516 	"FENCE 3 inc over max value and clipped",
517 	"FENCE 0 dec under min value and clipped",
518 	"FENCE 1 dec under min value and clipped",
519 	"FENCE 2 dec under min value and clipped",
520 	"FENCE 3 dec under min value and clipped",
521 	"CPDMA Up overflow",
522 	"PQC L2H error"
523 };
524 
525 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
526 	"RSVD0",
527 	"CQ AXI HBW error",
528 	"CP AXI HBW error",
529 	"CP error due to undefined OPCODE",
530 	"CP encountered STOP OPCODE",
531 	"CP AXI LBW error",
532 	"CP WRREG32 or WRBULK returned error",
533 	"N/A",
534 	"FENCE 0 inc over max value and clipped",
535 	"FENCE 1 inc over max value and clipped",
536 	"FENCE 2 inc over max value and clipped",
537 	"FENCE 3 inc over max value and clipped",
538 	"FENCE 0 dec under min value and clipped",
539 	"FENCE 1 dec under min value and clipped",
540 	"FENCE 2 dec under min value and clipped",
541 	"FENCE 3 dec under min value and clipped",
542 	"CPDMA Up overflow",
543 	"RSVD17",
544 	"CQ_WR_IFIFO_CI_ERR",
545 	"CQ_WR_CTL_CI_ERR",
546 	"ARC_CQF_RD_ERR",
547 	"ARC_CQ_WR_IFIFO_CI_ERR",
548 	"ARC_CQ_WR_CTL_CI_ERR",
549 	"ARC_AXI_ERR",
550 	"CP_SWITCH_WDT_ERR"
551 };
552 
553 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
554 	"Choice push while full error",
555 	"Choice Q watchdog error",
556 	"MSG AXI LBW returned with error"
557 };
558 
559 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
560 	"qm_axi_err",
561 	"qm_trace_fence_events",
562 	"qm_sw_err",
563 	"qm_cp_sw_stop",
564 	"lbw_mstr_rresp_err",
565 	"lbw_mstr_bresp_err",
566 	"lbw_msg_slverr",
567 	"hbw_msg_slverr",
568 	"wbc_slverr",
569 	"hbw_mstr_rresp_err",
570 	"hbw_mstr_bresp_err",
571 	"sb_resp_intr",
572 	"mrsb_resp_intr",
573 	"core_dw_status_0",
574 	"core_dw_status_1",
575 	"core_dw_status_2",
576 	"core_dw_status_3",
577 	"core_dw_status_4",
578 	"core_dw_status_5",
579 	"core_dw_status_6",
580 	"core_dw_status_7",
581 	"async_arc2cpu_sei_intr",
582 };
583 
584 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
585 	"tpc_address_exceed_slm",
586 	"tpc_div_by_0",
587 	"tpc_spu_mac_overflow",
588 	"tpc_spu_addsub_overflow",
589 	"tpc_spu_abs_overflow",
590 	"tpc_spu_fma_fp_dst_nan",
591 	"tpc_spu_fma_fp_dst_inf",
592 	"tpc_spu_convert_fp_dst_nan",
593 	"tpc_spu_convert_fp_dst_inf",
594 	"tpc_spu_fp_dst_denorm",
595 	"tpc_vpu_mac_overflow",
596 	"tpc_vpu_addsub_overflow",
597 	"tpc_vpu_abs_overflow",
598 	"tpc_vpu_convert_fp_dst_nan",
599 	"tpc_vpu_convert_fp_dst_inf",
600 	"tpc_vpu_fma_fp_dst_nan",
601 	"tpc_vpu_fma_fp_dst_inf",
602 	"tpc_vpu_fp_dst_denorm",
603 	"tpc_assertions",
604 	"tpc_illegal_instruction",
605 	"tpc_pc_wrap_around",
606 	"tpc_qm_sw_err",
607 	"tpc_hbw_rresp_err",
608 	"tpc_hbw_bresp_err",
609 	"tpc_lbw_rresp_err",
610 	"tpc_lbw_bresp_err",
611 	"st_unlock_already_locked",
612 	"invalid_lock_access",
613 	"LD_L protection violation",
614 	"ST_L protection violation",
615 };
616 
617 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
618 	"agu_resp_intr",
619 	"qman_axi_err",
620 	"wap sei (wbc axi err)",
621 	"arc sei",
622 	"cfg access error",
623 	"qm_sw_err",
624 	"sbte_dbg_intr_0",
625 	"sbte_dbg_intr_1",
626 	"sbte_dbg_intr_2",
627 	"sbte_dbg_intr_3",
628 	"sbte_dbg_intr_4",
629 	"sbte_prtn_intr_0",
630 	"sbte_prtn_intr_1",
631 	"sbte_prtn_intr_2",
632 	"sbte_prtn_intr_3",
633 	"sbte_prtn_intr_4",
634 };
635 
636 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
637 	"i0",
638 	"i1",
639 	"i2",
640 	"i3",
641 	"i4",
642 };
643 
644 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
645 	"WBC ERR RESP_0",
646 	"WBC ERR RESP_1",
647 	"AP SOURCE POS INF",
648 	"AP SOURCE NEG INF",
649 	"AP SOURCE NAN",
650 	"AP RESULT POS INF",
651 	"AP RESULT NEG INF",
652 };
653 
654 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
655 	"HBW Read returned with error RRESP",
656 	"HBW write returned with error BRESP",
657 	"LBW write returned with error BRESP",
658 	"descriptor_fifo_overflow",
659 	"KDMA SB LBW Read returned with error",
660 	"KDMA WBC LBW Write returned with error",
661 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
662 	"WRONG CFG FOR COMMIT IN LIN DMA"
663 };
664 
665 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
666 	"HBW/LBW Read returned with error RRESP",
667 	"HBW/LBW write returned with error BRESP",
668 	"LBW write returned with error BRESP",
669 	"descriptor_fifo_overflow",
670 	"KDMA SB LBW Read returned with error",
671 	"KDMA WBC LBW Write returned with error",
672 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
673 	"WRONG CFG FOR COMMIT IN LIN DMA"
674 };
675 
676 struct gaudi2_sm_sei_cause_data {
677 	const char *cause_name;
678 	const char *log_name;
679 	u32 log_mask;
680 };
681 
682 static const struct gaudi2_sm_sei_cause_data
683 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
684 	{"calculated SO value overflow/underflow", "SOB group ID", 0x7FF},
685 	{"payload address of monitor is not aligned to 4B", "monitor addr", 0xFFFF},
686 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id", 0xFFFF},
687 };
688 
689 static const char * const
690 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
691 	"LATENCY_RD_OUT_FIFO_OVERRUN",
692 	"LATENCY_WR_OUT_FIFO_OVERRUN",
693 };
694 
695 static const char * const
696 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
697 	"LATENCY_RD_OUT_FIFO_OVERRUN",
698 	"LATENCY_WR_OUT_FIFO_OVERRUN",
699 };
700 
701 static const char * const
702 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
703 	"AXI drain HBW",
704 	"AXI drain LBW",
705 };
706 
707 static const char * const
708 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
709 	"HBW error response",
710 	"LBW error response",
711 	"TLP is blocked by RR"
712 };
713 
714 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
715 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
716 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
717 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
718 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
719 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
720 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
721 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
722 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
723 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
724 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
725 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
726 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
727 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
728 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
729 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
730 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
731 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
732 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
733 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
734 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
735 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
736 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
737 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
738 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
739 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
740 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
741 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
742 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
743 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
744 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
745 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
746 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
747 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
748 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
749 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
750 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
751 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
752 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
753 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
754 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
755 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
756 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
757 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
758 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
759 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
760 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
761 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
762 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
763 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
764 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
765 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
766 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
767 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
768 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
769 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
770 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
771 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
772 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
773 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
774 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
775 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
776 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
777 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
778 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
779 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
780 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
781 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
782 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
783 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
784 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
785 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
786 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
787 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
788 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
789 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
790 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
791 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
792 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
793 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
794 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
795 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
796 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
797 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
798 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
799 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
800 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
801 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
802 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
803 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
804 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
805 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
806 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
807 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
808 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
809 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
810 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
811 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
812 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
813 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
814 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
815 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
816 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
817 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
818 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
819 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
820 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
821 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
822 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
823 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
824 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
825 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
826 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
827 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
828 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
829 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
830 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
831 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
832 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
833 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
834 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
835 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
836 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
837 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
838 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
839 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
840 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
841 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
842 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
843 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
844 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
845 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
846 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
847 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
848 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
849 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
850 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
851 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
852 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
853 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
854 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
855 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
856 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
857 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
858 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
859 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
860 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
861 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
862 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
863 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
864 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
865 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
866 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
867 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
868 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
869 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
870 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
871 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
872 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
873 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
874 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
875 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
876 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
877 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
878 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
879 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
880 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
881 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
882 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
883 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
884 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
885 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
886 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
887 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
888 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
889 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
890 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
891 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
892 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
893 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
894 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
895 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
896 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
897 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
898 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
899 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
900 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
901 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
902 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
903 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
904 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
905 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
906 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
907 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
908 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
909 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
910 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
911 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
912 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
913 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
914 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
915 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
916 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
917 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
918 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
919 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
920 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
921 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
922 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
923 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
924 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
925 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
926 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
927 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
928 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
929 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
930 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
931 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
932 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
933 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
934 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
935 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
936 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
937 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
938 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
939 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
940 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
941 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
942 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
943 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
944 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
945 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
946 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
947 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
948 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
949 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
950 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
951 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
952 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
953 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
954 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
955 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
956 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
957 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
958 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
959 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
960 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
961 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
962 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
963 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
964 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
965 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
966 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
967 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
968 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
969 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
970 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
971 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
972 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
973 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
974 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
975 };
976 
977 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
978 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
979 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
980 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
981 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
982 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
983 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
984 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
985 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
986 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
987 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
988 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
989 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
990 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
991 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
992 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
993 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
994 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
995 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
996 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
997 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
998 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
999 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1000 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1001 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1002 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1003 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1004 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1005 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1006 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1007 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1008 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1009 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1010 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1011 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1012 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1013 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1014 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1015 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1016 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1017 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1018 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1019 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1020 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1021 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1022 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1023 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1024 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1025 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1026 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1027 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1028 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1029 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1030 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1031 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1032 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1033 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1034 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1035 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1036 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1037 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1038 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1039 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1040 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1041 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1042 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1043 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1044 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1045 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1046 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1047 };
1048 
1049 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1050 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1051 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1052 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1053 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1054 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1055 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1056 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1057 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1058 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1059 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1060 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1061 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1062 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1063 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1064 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1065 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1066 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1067 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1068 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1069 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1070 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1071 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1072 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1073 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1074 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1075 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1076 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1077 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1078 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1079 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1080 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1081 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1082 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1083 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1084 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1085 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1086 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1087 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1088 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1089 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1090 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1091 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1092 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1093 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1094 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1095 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1096 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1097 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1098 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1099 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1100 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1101 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1102 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1103 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1104 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1105 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1106 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1107 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1108 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1109 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1110 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1111 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1112 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1113 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1114 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1115 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1116 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1117 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1118 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1119 };
1120 
1121 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1122 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1123 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1124 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1125 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1126 };
1127 
1128 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1129 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1130 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1131 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1132 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1133 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1134 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1135 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1136 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1137 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1138 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1139 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1140 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1141 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1142 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1143 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1144 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1145 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1146 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1147 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1148 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1149 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1150 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1151 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1152 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1153 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1154 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1155 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1156 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1157 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1158 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1159 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1160 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1161 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1162 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1163 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1164 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1165 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1166 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1167 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1168 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1169 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1170 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1171 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1172 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1173 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1174 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1175 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1176 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1177 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1178 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1179 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1180 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1181 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1182 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1183 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1184 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1185 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1186 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1187 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1188 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1189 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1190 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1191 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1192 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1193 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1194 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1195 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1196 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1197 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1198 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1199 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1200 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1201 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1202 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1203 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1204 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1205 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1206 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1207 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1208 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1209 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1210 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1211 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1212 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1213 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1214 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1215 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1216 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1217 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1218 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1219 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1220 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1221 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1222 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1223 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1224 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1225 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1226 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1227 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1228 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1229 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1230 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1231 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1232 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1233 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1234 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1235 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1236 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1237 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1238 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1239 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1240 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1241 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1242 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1243 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1244 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1245 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1246 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1247 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1248 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1249 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1250 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1251 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1252 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1253 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1254 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1255 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1256 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1257 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1258 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1259 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1260 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1261 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1262 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1263 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1264 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1265 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1266 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1267 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1268 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1269 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1270 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1271 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1272 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1273 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1274 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1275 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1276 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1277 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1278 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1279 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1280 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1281 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1282 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1283 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1284 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1285 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1286 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1287 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1288 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1289 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1290 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1291 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1292 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1293 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1294 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1295 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1296 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1297 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1298 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1299 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1300 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1301 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1302 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1303 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1304 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1305 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1306 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1307 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1308 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1309 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1310 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1311 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1312 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1313 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1314 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1315 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1316 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1317 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1318 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1319 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1320 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1321 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1322 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1323 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1324 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1325 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1326 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1327 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1328 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1329 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1330 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1331 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1332 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1333 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1334 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1335 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1336 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1337 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1338 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1339 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1340 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1341 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1342 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1343 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1344 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1345 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1346 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1347 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1348 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1349 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1350 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1351 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1352 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1353 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1354 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1355 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1356 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1357 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1358 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1359 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1360 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1361 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1362 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1363 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1364 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1365 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1366 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1367 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1368 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1369 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1370 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1371 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1372 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1373 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1374 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1375 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1376 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1377 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1378 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1379 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1380 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1381 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1382 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1383 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1384 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1385 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1386 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1387 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1388 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1389 };
1390 
1391 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1392 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1393 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1394 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1395 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1396 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1397 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1398 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1399 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1400 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1401 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1402 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1403 };
1404 
1405 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1406 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1407 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1408 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1409 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1410 };
1411 
1412 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1413 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1414 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1415 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1416 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1417 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1418 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1419 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1420 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1421 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1422 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1423 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1424 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1425 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1426 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1427 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1428 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1429 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1430 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1431 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1432 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1433 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1434 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1435 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1436 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1437 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1438 };
1439 
1440 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1441 	[ROTATOR_ID_0] = mmROT0_BASE,
1442 	[ROTATOR_ID_1] = mmROT1_BASE
1443 };
1444 
1445 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1446 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1447 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1448 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1449 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1450 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1451 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1452 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1453 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1454 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1455 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1456 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1457 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1458 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1459 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1460 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1461 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1462 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1463 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1464 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1465 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1466 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1467 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1468 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1469 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1470 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1471 };
1472 
1473 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1474 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1475 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1476 };
1477 
1478 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1479 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1480 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1481 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1482 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1483 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1484 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1485 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1486 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1487 };
1488 
1489 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1490 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1491 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1492 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1493 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1494 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1495 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1496 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1497 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1498 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1499 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1500 };
1501 
1502 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1503 	RTR_ID_X_Y(2, 4),
1504 	RTR_ID_X_Y(3, 4),
1505 	RTR_ID_X_Y(4, 4),
1506 	RTR_ID_X_Y(5, 4),
1507 	RTR_ID_X_Y(6, 4),
1508 	RTR_ID_X_Y(7, 4),
1509 	RTR_ID_X_Y(8, 4),
1510 	RTR_ID_X_Y(9, 4),
1511 	RTR_ID_X_Y(10, 4),
1512 	RTR_ID_X_Y(11, 4),
1513 	RTR_ID_X_Y(12, 4),
1514 	RTR_ID_X_Y(13, 4),
1515 	RTR_ID_X_Y(14, 4),
1516 	RTR_ID_X_Y(15, 4),
1517 	RTR_ID_X_Y(16, 4),
1518 	RTR_ID_X_Y(17, 4),
1519 	RTR_ID_X_Y(2, 11),
1520 	RTR_ID_X_Y(3, 11),
1521 	RTR_ID_X_Y(4, 11),
1522 	RTR_ID_X_Y(5, 11),
1523 	RTR_ID_X_Y(6, 11),
1524 	RTR_ID_X_Y(7, 11),
1525 	RTR_ID_X_Y(8, 11),
1526 	RTR_ID_X_Y(9, 11),
1527 	RTR_ID_X_Y(0, 0),/* 24 no id */
1528 	RTR_ID_X_Y(0, 0),/* 25 no id */
1529 	RTR_ID_X_Y(0, 0),/* 26 no id */
1530 	RTR_ID_X_Y(0, 0),/* 27 no id */
1531 	RTR_ID_X_Y(14, 11),
1532 	RTR_ID_X_Y(15, 11),
1533 	RTR_ID_X_Y(16, 11),
1534 	RTR_ID_X_Y(17, 11)
1535 };
1536 
1537 enum rtr_id {
1538 	DCORE0_RTR0,
1539 	DCORE0_RTR1,
1540 	DCORE0_RTR2,
1541 	DCORE0_RTR3,
1542 	DCORE0_RTR4,
1543 	DCORE0_RTR5,
1544 	DCORE0_RTR6,
1545 	DCORE0_RTR7,
1546 	DCORE1_RTR0,
1547 	DCORE1_RTR1,
1548 	DCORE1_RTR2,
1549 	DCORE1_RTR3,
1550 	DCORE1_RTR4,
1551 	DCORE1_RTR5,
1552 	DCORE1_RTR6,
1553 	DCORE1_RTR7,
1554 	DCORE2_RTR0,
1555 	DCORE2_RTR1,
1556 	DCORE2_RTR2,
1557 	DCORE2_RTR3,
1558 	DCORE2_RTR4,
1559 	DCORE2_RTR5,
1560 	DCORE2_RTR6,
1561 	DCORE2_RTR7,
1562 	DCORE3_RTR0,
1563 	DCORE3_RTR1,
1564 	DCORE3_RTR2,
1565 	DCORE3_RTR3,
1566 	DCORE3_RTR4,
1567 	DCORE3_RTR5,
1568 	DCORE3_RTR6,
1569 	DCORE3_RTR7,
1570 };
1571 
1572 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1573 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1574 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1575 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1576 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1577 	DCORE0_RTR0
1578 };
1579 
1580 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1581 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1582 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1583 };
1584 
1585 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1586 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1587 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1588 };
1589 
1590 struct sft_info {
1591 	u8 interface_id;
1592 	u8 dcore_id;
1593 };
1594 
1595 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1596 	{0, 0},	{1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3},	{0, 2},	{0, 3},
1597 };
1598 
1599 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1600 	DCORE0_RTR0, DCORE0_RTR0
1601 };
1602 
1603 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1604 	DCORE2_RTR0, DCORE3_RTR7
1605 };
1606 
1607 struct mme_initiators_rtr_id {
1608 	u32 wap0;
1609 	u32 wap1;
1610 	u32 write;
1611 	u32 read;
1612 	u32 sbte0;
1613 	u32 sbte1;
1614 	u32 sbte2;
1615 	u32 sbte3;
1616 	u32 sbte4;
1617 };
1618 
1619 enum mme_initiators {
1620 	MME_WAP0 = 0,
1621 	MME_WAP1,
1622 	MME_WRITE,
1623 	MME_READ,
1624 	MME_SBTE0,
1625 	MME_SBTE1,
1626 	MME_SBTE2,
1627 	MME_SBTE3,
1628 	MME_SBTE4,
1629 	MME_INITIATORS_MAX
1630 };
1631 
1632 static const struct mme_initiators_rtr_id
1633 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1634 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1635 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1636 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1637 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1638 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1639 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1640 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1641 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1642 };
1643 
1644 enum razwi_event_sources {
1645 	RAZWI_TPC,
1646 	RAZWI_MME,
1647 	RAZWI_EDMA,
1648 	RAZWI_PDMA,
1649 	RAZWI_NIC,
1650 	RAZWI_DEC,
1651 	RAZWI_ROT
1652 };
1653 
1654 struct hbm_mc_error_causes {
1655 	u32 mask;
1656 	char cause[50];
1657 };
1658 
1659 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1660 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1661 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1662 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1663 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1664 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1665 };
1666 
1667 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1668 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1669 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1670 	[HBM_SEI_READ_ERR] = "SEI read data error",
1671 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1672 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1673 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1674 	[HBM_SEI_DFI] = "SEI DFI error",
1675 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1676 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1677 };
1678 
1679 struct mmu_spi_sei_cause {
1680 	char cause[50];
1681 	int clear_bit;
1682 };
1683 
1684 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1685 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
1686 	{"page access", 1},		/* INTERRUPT_CLR[1] */
1687 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
1688 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
1689 	{"mmu rei0", -1},		/* no clear register bit */
1690 	{"mmu rei1", -1},		/* no clear register bit */
1691 	{"stlb rei0", -1},		/* no clear register bit */
1692 	{"stlb rei1", -1},		/* no clear register bit */
1693 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
1694 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
1695 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
1696 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
1697 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1698 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1699 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1700 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1701 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
1702 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
1703 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
1704 };
1705 
1706 struct gaudi2_cache_invld_params {
1707 	u64 start_va;
1708 	u64 end_va;
1709 	u32 inv_start_val;
1710 	u32 flags;
1711 	bool range_invalidation;
1712 };
1713 
1714 struct gaudi2_tpc_idle_data {
1715 	struct engines_data *e;
1716 	unsigned long *mask;
1717 	bool *is_idle;
1718 	const char *tpc_fmt;
1719 };
1720 
1721 struct gaudi2_tpc_mmu_data {
1722 	u32 rw_asid;
1723 };
1724 
1725 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1726 
1727 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1728 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1729 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1730 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1731 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1732 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1733 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1734 										bool is_memset);
1735 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1736 
1737 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1738 {
1739 
1740 }
1741 
1742 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1743 {
1744 	return sizeof(struct packet_msg_short);
1745 }
1746 
1747 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1748 {
1749 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1750 }
1751 
1752 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1753 {
1754 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1755 	int dcore, inst, tpc_seq;
1756 	u32 offset;
1757 
1758 	/* init the return code */
1759 	ctx->rc = 0;
1760 
1761 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1762 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1763 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1764 
1765 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1766 				continue;
1767 
1768 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1769 
1770 			ctx->fn(hdev, dcore, inst, offset, ctx);
1771 			if (ctx->rc) {
1772 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1773 							dcore, inst);
1774 				return;
1775 			}
1776 		}
1777 	}
1778 
1779 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1780 		return;
1781 
1782 	/* special check for PCI TPC (DCORE0_TPC6) */
1783 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1784 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1785 	if (ctx->rc)
1786 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1787 }
1788 
1789 static bool gaudi2_host_phys_addr_valid(u64 addr)
1790 {
1791 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1792 		return true;
1793 
1794 	return false;
1795 }
1796 
1797 static int set_number_of_functional_hbms(struct hl_device *hdev)
1798 {
1799 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1800 	u8 faulty_hbms = hweight64(hdev->dram_binning);
1801 
1802 	/* check if all HBMs should be used */
1803 	if (!faulty_hbms) {
1804 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1805 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
1806 		return 0;
1807 	}
1808 
1809 	/*
1810 	 * check for error condition in which number of binning
1811 	 * candidates is higher than the maximum supported by the
1812 	 * driver (in which case binning mask shall be ignored and driver will
1813 	 * set the default)
1814 	 */
1815 	if (faulty_hbms > MAX_FAULTY_HBMS) {
1816 		dev_err(hdev->dev,
1817 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1818 			MAX_FAULTY_HBMS, hdev->dram_binning);
1819 		return -EINVAL;
1820 	}
1821 
1822 	/*
1823 	 * by default, number of functional HBMs in Gaudi2 is always
1824 	 * GAUDI2_HBM_NUM - 1.
1825 	 */
1826 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1827 	return 0;
1828 }
1829 
1830 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1831 {
1832 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1833 	u32 basic_hbm_page_size;
1834 	int rc;
1835 
1836 	rc = set_number_of_functional_hbms(hdev);
1837 	if (rc)
1838 		return -EINVAL;
1839 
1840 	/*
1841 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1842 	 * in which we are using x16 bigger page size to be able to populate the entire
1843 	 * HBM mappings in the TLB
1844 	 */
1845 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1846 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1847 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1848 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
1849 	prop->dram_base_address = DRAM_PHYS_BASE;
1850 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1851 	prop->dram_supports_virtual_memory = true;
1852 
1853 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1854 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1855 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1856 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1857 
1858 	/* since DRAM page size differs from DMMU page size we need to allocate
1859 	 * DRAM memory in units of dram_page size and mapping this memory in
1860 	 * units of DMMU page size. we overcome this size mismatch using a
1861 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
1862 	 * page.
1863 	 * We therefore:
1864 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
1865 	 *    (suppose we get n such pages)
1866 	 * 2. limit the amount of virtual address space we got from 1 above to
1867 	 *    a multiple of 64M as we don't want the scrambled address to cross
1868 	 *    the DRAM virtual address space.
1869 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
1870 	 * 3. determine the and address accordingly
1871 	 *    end_addr = start_addr + m * 48M
1872 	 *
1873 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
1874 	 */
1875 	prop->dmmu.start_addr = prop->dram_base_address +
1876 			(prop->dram_page_size *
1877 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1878 
1879 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1880 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1881 
1882 	return 0;
1883 }
1884 
1885 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1886 {
1887 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1888 	struct hw_queue_properties *q_props;
1889 	u32 num_sync_stream_queues = 0;
1890 	int i;
1891 
1892 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1893 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1894 					GFP_KERNEL);
1895 
1896 	if (!prop->hw_queues_props)
1897 		return -ENOMEM;
1898 
1899 	q_props = prop->hw_queues_props;
1900 
1901 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1902 		q_props[i].type = QUEUE_TYPE_HW;
1903 		q_props[i].driver_only = 0;
1904 
1905 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1906 			q_props[i].supports_sync_stream = 0;
1907 		} else {
1908 			q_props[i].supports_sync_stream = 1;
1909 			num_sync_stream_queues++;
1910 		}
1911 
1912 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1913 	}
1914 
1915 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1916 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1917 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1918 
1919 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1920 	prop->cfg_base_address = CFG_BASE;
1921 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1922 	prop->host_base_address = HOST_PHYS_BASE_0;
1923 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1924 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1925 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1926 	prop->user_dec_intr_count = NUMBER_OF_DEC;
1927 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1928 	prop->completion_mode = HL_COMPLETION_MODE_CS;
1929 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1930 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1931 
1932 	prop->sram_base_address = SRAM_BASE_ADDR;
1933 	prop->sram_size = SRAM_SIZE;
1934 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1935 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1936 
1937 	prop->hints_range_reservation = true;
1938 
1939 	if (hdev->pldm)
1940 		prop->mmu_pgt_size = 0x800000; /* 8MB */
1941 	else
1942 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1943 
1944 	prop->mmu_pte_size = HL_PTE_SIZE;
1945 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1946 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1947 
1948 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1949 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1950 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1951 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1952 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1953 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1954 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1955 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1956 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1957 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1958 	prop->dmmu.page_size = PAGE_SIZE_1GB;
1959 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1960 	prop->dmmu.last_mask = LAST_MASK;
1961 	prop->dmmu.host_resident = 1;
1962 	/* TODO: will be duplicated until implementing per-MMU props */
1963 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1964 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1965 
1966 	/*
1967 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
1968 	 * the addresses and allocated space for FW image does not cross memory bounds).
1969 	 * for this reason we set the DRAM size to the minimum possible and later it will
1970 	 * be modified according to what reported in the cpucp info packet
1971 	 */
1972 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1973 
1974 	hdev->pmmu_huge_range = true;
1975 	prop->pmmu.host_resident = 1;
1976 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1977 	prop->pmmu.last_mask = LAST_MASK;
1978 	/* TODO: will be duplicated until implementing per-MMU props */
1979 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1980 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1981 
1982 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1983 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1984 	prop->hints_host_hpage_reserved_va_range.start_addr =
1985 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1986 	prop->hints_host_hpage_reserved_va_range.end_addr =
1987 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1988 
1989 	if (PAGE_SIZE == SZ_64K) {
1990 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1991 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1992 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1993 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1994 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1995 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1996 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1997 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1998 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1999 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2000 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2001 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2002 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2003 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2004 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2005 
2006 		/* shifts and masks are the same in PMMU and HPMMU */
2007 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2008 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2009 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2010 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2011 	} else {
2012 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2013 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2014 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2015 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2016 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2017 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2018 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2019 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2020 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2021 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2022 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2023 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2024 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2025 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2026 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2027 
2028 		/* shifts and masks are the same in PMMU and HPMMU */
2029 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2030 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2031 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2032 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2033 	}
2034 
2035 	prop->num_engine_cores = CPU_ID_MAX;
2036 	prop->cfg_size = CFG_SIZE;
2037 	prop->max_asid = MAX_ASID;
2038 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2039 
2040 	prop->dc_power_default = DC_POWER_DEFAULT;
2041 
2042 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2043 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2044 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2045 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2046 
2047 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2048 
2049 	prop->mme_master_slave_mode = 1;
2050 
2051 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2052 					(num_sync_stream_queues * HL_RSVD_SOBS);
2053 
2054 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2055 					(num_sync_stream_queues * HL_RSVD_MONS);
2056 
2057 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2058 
2059 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2060 
2061 	prop->fw_cpu_boot_dev_sts0_valid = false;
2062 	prop->fw_cpu_boot_dev_sts1_valid = false;
2063 	prop->hard_reset_done_by_fw = false;
2064 	prop->gic_interrupts_enable = true;
2065 
2066 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2067 
2068 	prop->max_dec = NUMBER_OF_DEC;
2069 
2070 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2071 
2072 	prop->dma_mask = 64;
2073 
2074 	return 0;
2075 }
2076 
2077 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2078 {
2079 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2080 	bool is_wc[3] = {false, false, true};
2081 	int rc;
2082 
2083 	rc = hl_pci_bars_map(hdev, name, is_wc);
2084 	if (rc)
2085 		return rc;
2086 
2087 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2088 
2089 	return 0;
2090 }
2091 
2092 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2093 {
2094 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2095 	struct hl_inbound_pci_region pci_region;
2096 	u64 old_addr = addr;
2097 	int rc;
2098 
2099 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2100 		return old_addr;
2101 
2102 	if (hdev->asic_prop.iatu_done_by_fw)
2103 		return U64_MAX;
2104 
2105 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2106 	pci_region.mode = PCI_BAR_MATCH_MODE;
2107 	pci_region.bar = DRAM_BAR_ID;
2108 	pci_region.addr = addr;
2109 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2110 	if (rc)
2111 		return U64_MAX;
2112 
2113 	if (gaudi2) {
2114 		old_addr = gaudi2->dram_bar_cur_addr;
2115 		gaudi2->dram_bar_cur_addr = addr;
2116 	}
2117 
2118 	return old_addr;
2119 }
2120 
2121 static int gaudi2_init_iatu(struct hl_device *hdev)
2122 {
2123 	struct hl_inbound_pci_region inbound_region;
2124 	struct hl_outbound_pci_region outbound_region;
2125 	u32 bar_addr_low, bar_addr_high;
2126 	int rc;
2127 
2128 	if (hdev->asic_prop.iatu_done_by_fw)
2129 		return 0;
2130 
2131 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2132 	 * We must map this region in BAR match mode in order to
2133 	 * fetch BAR physical base address
2134 	 */
2135 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2136 	inbound_region.bar = SRAM_CFG_BAR_ID;
2137 	/* Base address must be aligned to Bar size which is 256 MB */
2138 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2139 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2140 	if (rc)
2141 		return rc;
2142 
2143 	/* Fetch physical BAR address */
2144 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2145 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2146 
2147 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2148 
2149 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2150 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2151 	inbound_region.bar = SRAM_CFG_BAR_ID;
2152 	inbound_region.offset_in_bar = 0;
2153 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2154 	inbound_region.size = CFG_REGION_SIZE;
2155 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2156 	if (rc)
2157 		return rc;
2158 
2159 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2160 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2161 	inbound_region.bar = SRAM_CFG_BAR_ID;
2162 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2163 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2164 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2165 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2166 	if (rc)
2167 		return rc;
2168 
2169 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2170 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2171 	inbound_region.bar = DRAM_BAR_ID;
2172 	inbound_region.addr = DRAM_PHYS_BASE;
2173 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2174 	if (rc)
2175 		return rc;
2176 
2177 	/* Outbound Region 0 - Point to Host */
2178 	outbound_region.addr = HOST_PHYS_BASE_0;
2179 	outbound_region.size = HOST_PHYS_SIZE_0;
2180 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2181 
2182 	return rc;
2183 }
2184 
2185 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2186 {
2187 	return RREG32(mmHW_STATE);
2188 }
2189 
2190 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2191 {
2192 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2193 
2194 	/*
2195 	 * check for error condition in which number of binning candidates
2196 	 * is higher than the maximum supported by the driver
2197 	 */
2198 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2199 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2200 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2201 					hdev->tpc_binning);
2202 		return -EINVAL;
2203 	}
2204 
2205 	prop->tpc_binning_mask = hdev->tpc_binning;
2206 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2207 
2208 	return 0;
2209 }
2210 
2211 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2212 {
2213 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2214 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2215 	u64 tpc_binning_mask;
2216 	u8 subst_idx = 0;
2217 	int i, rc;
2218 
2219 	rc = gaudi2_tpc_binning_init_prop(hdev);
2220 	if (rc)
2221 		return rc;
2222 
2223 	tpc_binning_mask = prop->tpc_binning_mask;
2224 
2225 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2226 		u8 subst_seq, binned, qid_base;
2227 
2228 		if (tpc_binning_mask == 0)
2229 			break;
2230 
2231 		if (subst_idx == 0) {
2232 			subst_seq = TPC_ID_DCORE0_TPC6;
2233 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2234 		} else {
2235 			subst_seq = TPC_ID_DCORE3_TPC5;
2236 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2237 		}
2238 
2239 
2240 		/* clear bit from mask */
2241 		binned = __ffs(tpc_binning_mask);
2242 		/*
2243 		 * Coverity complains about possible out-of-bound access in
2244 		 * clear_bit
2245 		 */
2246 		if (binned >= TPC_ID_SIZE) {
2247 			dev_err(hdev->dev,
2248 				"Invalid binned TPC (binning mask: %llx)\n",
2249 				tpc_binning_mask);
2250 			return -EINVAL;
2251 		}
2252 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2253 
2254 		/* also clear replacing TPC bit from enabled mask */
2255 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2256 
2257 		/* bin substite TPC's Qs */
2258 		q_props[qid_base].binned = 1;
2259 		q_props[qid_base + 1].binned = 1;
2260 		q_props[qid_base + 2].binned = 1;
2261 		q_props[qid_base + 3].binned = 1;
2262 
2263 		subst_idx++;
2264 	}
2265 
2266 	return 0;
2267 }
2268 
2269 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2270 {
2271 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2272 	u8 num_faulty;
2273 
2274 	num_faulty = hweight32(hdev->decoder_binning);
2275 
2276 	/*
2277 	 * check for error condition in which number of binning candidates
2278 	 * is higher than the maximum supported by the driver
2279 	 */
2280 	if (num_faulty > MAX_FAULTY_DECODERS) {
2281 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2282 						hdev->decoder_binning);
2283 		return -EINVAL;
2284 	}
2285 
2286 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2287 
2288 	if (prop->decoder_binning_mask)
2289 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2290 	else
2291 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2292 
2293 	return 0;
2294 }
2295 
2296 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2297 {
2298 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2299 
2300 	/* check if we should override default binning */
2301 	if (!hdev->dram_binning) {
2302 		prop->dram_binning_mask = 0;
2303 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2304 		return;
2305 	}
2306 
2307 	/* set DRAM binning constraints */
2308 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2309 	prop->dram_binning_mask = hdev->dram_binning;
2310 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2311 }
2312 
2313 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2314 {
2315 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2316 	struct hw_queue_properties *q_props;
2317 	u8 seq, num_faulty;
2318 
2319 	num_faulty = hweight32(hdev->edma_binning);
2320 
2321 	/*
2322 	 * check for error condition in which number of binning candidates
2323 	 * is higher than the maximum supported by the driver
2324 	 */
2325 	if (num_faulty > MAX_FAULTY_EDMAS) {
2326 		dev_err(hdev->dev,
2327 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2328 			hdev->edma_binning);
2329 		return -EINVAL;
2330 	}
2331 
2332 	if (!hdev->edma_binning) {
2333 		prop->edma_binning_mask = 0;
2334 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2335 		return 0;
2336 	}
2337 
2338 	seq = __ffs((unsigned long)hdev->edma_binning);
2339 
2340 	/* set binning constraints */
2341 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2342 	prop->edma_binning_mask = hdev->edma_binning;
2343 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2344 
2345 	/* bin substitute EDMA's queue */
2346 	q_props = prop->hw_queues_props;
2347 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2348 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2349 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2350 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2351 
2352 	return 0;
2353 }
2354 
2355 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2356 {
2357 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2358 	u8 num_faulty, seq;
2359 
2360 	/* check if we should override default binning */
2361 	if (!xbar_edge_iso_mask) {
2362 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2363 		return 0;
2364 	}
2365 
2366 	/*
2367 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2368 	 * only the FW can set a redundancy value). for user it'll always be 0.
2369 	 */
2370 	num_faulty = hweight32(xbar_edge_iso_mask);
2371 
2372 	/*
2373 	 * check for error condition in which number of binning candidates
2374 	 * is higher than the maximum supported by the driver
2375 	 */
2376 	if (num_faulty > MAX_FAULTY_XBARS) {
2377 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2378 									MAX_FAULTY_XBARS);
2379 		return -EINVAL;
2380 	}
2381 
2382 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2383 
2384 	/* set binning constraints */
2385 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2386 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2387 
2388 	return 0;
2389 }
2390 
2391 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2392 {
2393 	int rc;
2394 
2395 	/*
2396 	 * mark all clusters as good, each component will "fail" cluster
2397 	 * based on eFuse/user values.
2398 	 * If more than single cluster is faulty- the chip is unusable
2399 	 */
2400 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2401 
2402 	gaudi2_set_dram_binning_masks(hdev);
2403 
2404 	rc = gaudi2_set_edma_binning_masks(hdev);
2405 	if (rc)
2406 		return rc;
2407 
2408 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2409 	if (rc)
2410 		return rc;
2411 
2412 
2413 	/* always initially set to full mask */
2414 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2415 
2416 	return 0;
2417 }
2418 
2419 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2420 {
2421 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2422 	int rc;
2423 
2424 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2425 	if (rc)
2426 		return rc;
2427 
2428 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2429 	if (prop->faulty_dram_cluster_map) {
2430 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2431 
2432 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2433 	}
2434 
2435 	return 0;
2436 }
2437 
2438 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2439 {
2440 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2441 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2442 	long max_power;
2443 	u64 dram_size;
2444 	int rc;
2445 
2446 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2447 		return 0;
2448 
2449 	/* No point of asking this information again when not doing hard reset, as the device
2450 	 * CPU hasn't been reset
2451 	 */
2452 	if (hdev->reset_info.in_compute_reset)
2453 		return 0;
2454 
2455 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2456 										mmCPU_BOOT_ERR1);
2457 	if (rc)
2458 		return rc;
2459 
2460 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2461 	if (dram_size) {
2462 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2463 
2464 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2465 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2466 			dev_err(hdev->dev,
2467 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2468 				dram_size, prop->dram_size);
2469 			dram_size = prop->dram_size;
2470 		}
2471 
2472 		prop->dram_size = dram_size;
2473 		prop->dram_end_address = prop->dram_base_address + dram_size;
2474 	}
2475 
2476 	if (!strlen(prop->cpucp_info.card_name))
2477 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2478 
2479 	/* Overwrite binning masks with the actual binning values from F/W */
2480 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2481 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2482 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2483 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2484 
2485 	/*
2486 	 * at this point the DRAM parameters need to be updated according to data obtained
2487 	 * from the FW
2488 	 */
2489 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2490 	if (rc)
2491 		return rc;
2492 
2493 	rc = gaudi2_set_cluster_binning_masks(hdev);
2494 	if (rc)
2495 		return rc;
2496 
2497 	rc = gaudi2_set_tpc_binning_masks(hdev);
2498 	if (rc)
2499 		return rc;
2500 
2501 	rc = gaudi2_set_dec_binning_masks(hdev);
2502 	if (rc)
2503 		return rc;
2504 
2505 	max_power = hl_fw_get_max_power(hdev);
2506 	if (max_power < 0)
2507 		return max_power;
2508 
2509 	prop->max_power_default = (u64) max_power;
2510 
2511 	return 0;
2512 }
2513 
2514 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2515 {
2516 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2517 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2518 	int rc;
2519 
2520 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2521 		return 0;
2522 
2523 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2524 	if (rc)
2525 		return rc;
2526 
2527 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2528 
2529 	return 0;
2530 }
2531 
2532 static int gaudi2_early_init(struct hl_device *hdev)
2533 {
2534 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2535 	struct pci_dev *pdev = hdev->pdev;
2536 	resource_size_t pci_bar_size;
2537 	int rc;
2538 
2539 	rc = gaudi2_set_fixed_properties(hdev);
2540 	if (rc)
2541 		return rc;
2542 
2543 	/* Check BAR sizes */
2544 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2545 
2546 	if (pci_bar_size != CFG_BAR_SIZE) {
2547 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2548 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2549 		rc = -ENODEV;
2550 		goto free_queue_props;
2551 	}
2552 
2553 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2554 	if (pci_bar_size != MSIX_BAR_SIZE) {
2555 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2556 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2557 		rc = -ENODEV;
2558 		goto free_queue_props;
2559 	}
2560 
2561 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2562 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2563 
2564 	/*
2565 	 * Only in pldm driver config iATU
2566 	 */
2567 	if (hdev->pldm)
2568 		hdev->asic_prop.iatu_done_by_fw = false;
2569 	else
2570 		hdev->asic_prop.iatu_done_by_fw = true;
2571 
2572 	rc = hl_pci_init(hdev);
2573 	if (rc)
2574 		goto free_queue_props;
2575 
2576 	/* Before continuing in the initialization, we need to read the preboot
2577 	 * version to determine whether we run with a security-enabled firmware
2578 	 */
2579 	rc = hl_fw_read_preboot_status(hdev);
2580 	if (rc) {
2581 		if (hdev->reset_on_preboot_fail)
2582 			hdev->asic_funcs->hw_fini(hdev, true, false);
2583 		goto pci_fini;
2584 	}
2585 
2586 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2587 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2588 		hdev->asic_funcs->hw_fini(hdev, true, false);
2589 	}
2590 
2591 	return 0;
2592 
2593 pci_fini:
2594 	hl_pci_fini(hdev);
2595 free_queue_props:
2596 	kfree(hdev->asic_prop.hw_queues_props);
2597 	return rc;
2598 }
2599 
2600 static int gaudi2_early_fini(struct hl_device *hdev)
2601 {
2602 	kfree(hdev->asic_prop.hw_queues_props);
2603 	hl_pci_fini(hdev);
2604 
2605 	return 0;
2606 }
2607 
2608 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2609 {
2610 	switch (arc_id) {
2611 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2612 		return true;
2613 	default:
2614 		return false;
2615 	}
2616 }
2617 
2618 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2619 {
2620 	switch (arc_id) {
2621 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2622 		return true;
2623 	default:
2624 		return false;
2625 	}
2626 }
2627 
2628 static void gaudi2_init_arcs(struct hl_device *hdev)
2629 {
2630 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2631 	u64 arc_id;
2632 	u32 i;
2633 
2634 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2635 		if (gaudi2_is_arc_enabled(hdev, i))
2636 			continue;
2637 
2638 		gaudi2_set_arc_id_cap(hdev, i);
2639 	}
2640 
2641 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2642 		if (!gaudi2_is_queue_enabled(hdev, i))
2643 			continue;
2644 
2645 		arc_id = gaudi2_queue_id_to_arc_id[i];
2646 		if (gaudi2_is_arc_enabled(hdev, arc_id))
2647 			continue;
2648 
2649 		if (gaudi2_is_arc_nic_owned(arc_id) &&
2650 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2651 			continue;
2652 
2653 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2654 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2655 			continue;
2656 
2657 		gaudi2_set_arc_id_cap(hdev, arc_id);
2658 	}
2659 }
2660 
2661 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2662 {
2663 	u32 reg_base, reg_val;
2664 	int rc;
2665 
2666 	switch (cpu_id) {
2667 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2668 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
2669 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2670 						ARC_DCCM_BLOCK_SIZE * 2, true);
2671 		if (rc)
2672 			return rc;
2673 		break;
2674 	case CPU_ID_SCHED_ARC4:
2675 	case CPU_ID_SCHED_ARC5:
2676 	case CPU_ID_MME_QMAN_ARC0:
2677 	case CPU_ID_MME_QMAN_ARC1:
2678 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
2679 
2680 		/* Scrub lower DCCM block */
2681 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2682 						ARC_DCCM_BLOCK_SIZE, true);
2683 		if (rc)
2684 			return rc;
2685 
2686 		/* Switch to upper DCCM block */
2687 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2688 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2689 
2690 		/* Scrub upper DCCM block */
2691 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2692 						ARC_DCCM_BLOCK_SIZE, true);
2693 		if (rc)
2694 			return rc;
2695 
2696 		/* Switch to lower DCCM block */
2697 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2698 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2699 		break;
2700 	default:
2701 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2702 						ARC_DCCM_BLOCK_SIZE, true);
2703 		if (rc)
2704 			return rc;
2705 	}
2706 
2707 	return 0;
2708 }
2709 
2710 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2711 {
2712 	u16 arc_id;
2713 
2714 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2715 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
2716 			continue;
2717 
2718 		gaudi2_scrub_arc_dccm(hdev, arc_id);
2719 	}
2720 }
2721 
2722 static int gaudi2_late_init(struct hl_device *hdev)
2723 {
2724 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2725 	int rc;
2726 
2727 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
2728 
2729 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2730 					gaudi2->virt_msix_db_dma_addr);
2731 	if (rc) {
2732 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2733 		return rc;
2734 	}
2735 
2736 	rc = gaudi2_fetch_psoc_frequency(hdev);
2737 	if (rc) {
2738 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2739 		goto disable_pci_access;
2740 	}
2741 
2742 	gaudi2_init_arcs(hdev);
2743 	gaudi2_scrub_arcs_dccm(hdev);
2744 	gaudi2_init_security(hdev);
2745 
2746 	return 0;
2747 
2748 disable_pci_access:
2749 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2750 
2751 	return rc;
2752 }
2753 
2754 static void gaudi2_late_fini(struct hl_device *hdev)
2755 {
2756 	hl_hwmon_release_resources(hdev);
2757 }
2758 
2759 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2760 {
2761 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2762 
2763 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2764 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2765 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2766 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2767 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2768 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2769 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2770 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2771 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2772 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2773 }
2774 
2775 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2776 {
2777 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2778 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2779 	u32 block_size, umr_start_idx, num_umr_blocks;
2780 	int i;
2781 
2782 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2783 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2784 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
2785 		else
2786 			block_size = ARC_DCCM_BLOCK_SIZE;
2787 
2788 		blocks[i].address = gaudi2_arc_dccm_bases[i];
2789 		blocks[i].size = block_size;
2790 	}
2791 
2792 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2793 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2794 
2795 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2796 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2797 
2798 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2799 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2800 
2801 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2802 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2803 
2804 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2805 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2806 
2807 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2808 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2809 
2810 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2811 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2812 
2813 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2814 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2815 
2816 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2817 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2818 	for (i = 0 ; i < num_umr_blocks ; i++) {
2819 		u8 nic_id, umr_block_id;
2820 
2821 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2822 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2823 
2824 		blocks[umr_start_idx + i].address =
2825 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2826 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2827 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2828 			umr_block_id * NIC_UMR_OFFSET;
2829 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2830 	}
2831 
2832 	/* Expose decoder HW configuration block to user */
2833 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2834 
2835 	for (i = 1; i < NUM_OF_DCORES; ++i) {
2836 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2837 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2838 
2839 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2840 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2841 
2842 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2843 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2844 	}
2845 }
2846 
2847 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2848 {
2849 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2850 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2851 	int i, j, rc = 0;
2852 
2853 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
2854 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2855 	 * range.
2856 	 */
2857 
2858 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2859 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2860 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2861 		if (!virt_addr_arr[i]) {
2862 			rc = -ENOMEM;
2863 			goto free_dma_mem_arr;
2864 		}
2865 
2866 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2867 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2868 			break;
2869 	}
2870 
2871 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2872 		dev_err(hdev->dev,
2873 			"MSB of ARC accessible DMA memory are not identical in all range\n");
2874 		rc = -EFAULT;
2875 		goto free_dma_mem_arr;
2876 	}
2877 
2878 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2879 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2880 
2881 free_dma_mem_arr:
2882 	for (j = 0 ; j < i ; j++)
2883 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2884 						dma_addr_arr[j]);
2885 
2886 	return rc;
2887 }
2888 
2889 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2890 {
2891 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2892 	struct pci_mem_region *region;
2893 
2894 	/* CFG */
2895 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
2896 	region->region_base = CFG_BASE;
2897 	region->region_size = CFG_SIZE;
2898 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2899 	region->bar_size = CFG_BAR_SIZE;
2900 	region->bar_id = SRAM_CFG_BAR_ID;
2901 	region->used = 1;
2902 
2903 	/* SRAM */
2904 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2905 	region->region_base = SRAM_BASE_ADDR;
2906 	region->region_size = SRAM_SIZE;
2907 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2908 	region->bar_size = CFG_BAR_SIZE;
2909 	region->bar_id = SRAM_CFG_BAR_ID;
2910 	region->used = 1;
2911 
2912 	/* DRAM */
2913 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2914 	region->region_base = DRAM_PHYS_BASE;
2915 	region->region_size = hdev->asic_prop.dram_size;
2916 	region->offset_in_bar = 0;
2917 	region->bar_size = prop->dram_pci_bar_size;
2918 	region->bar_id = DRAM_BAR_ID;
2919 	region->used = 1;
2920 }
2921 
2922 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2923 {
2924 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2925 	int i, j, k;
2926 
2927 	/* Initialize common user CQ interrupt */
2928 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2929 				HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2930 
2931 	/* Initialize common decoder interrupt */
2932 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2933 				HL_COMMON_DEC_INTERRUPT_ID, true);
2934 
2935 	/* User interrupts structure holds both decoder and user interrupts from various engines.
2936 	 * We first initialize the decoder interrupts and then we add the user interrupts.
2937 	 * The only limitation is that the last decoder interrupt id must be smaller
2938 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2939 	 */
2940 
2941 	/* Initialize decoder interrupts, expose only normal interrupts,
2942 	 * error interrupts to be handled by driver
2943 	 */
2944 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2945 										i += 2, j++)
2946 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2947 
2948 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2949 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2950 }
2951 
2952 static inline int gaudi2_get_non_zero_random_int(void)
2953 {
2954 	int rand = get_random_u32();
2955 
2956 	return rand ? rand : 1;
2957 }
2958 
2959 static int gaudi2_sw_init(struct hl_device *hdev)
2960 {
2961 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2962 	struct gaudi2_device *gaudi2;
2963 	int i, rc;
2964 
2965 	/* Allocate device structure */
2966 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2967 	if (!gaudi2)
2968 		return -ENOMEM;
2969 
2970 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2971 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2972 			continue;
2973 
2974 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2975 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2976 				GAUDI2_EVENT_SIZE);
2977 			rc = -EINVAL;
2978 			goto free_gaudi2_device;
2979 		}
2980 
2981 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2982 	}
2983 
2984 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2985 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2986 
2987 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
2988 
2989 	hdev->asic_specific = gaudi2;
2990 
2991 	/* Create DMA pool for small allocations.
2992 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
2993 	 * PI/CI registers allocated from this pool have this restriction
2994 	 */
2995 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
2996 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
2997 	if (!hdev->dma_pool) {
2998 		dev_err(hdev->dev, "failed to create DMA pool\n");
2999 		rc = -ENOMEM;
3000 		goto free_gaudi2_device;
3001 	}
3002 
3003 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3004 	if (rc)
3005 		goto free_dma_pool;
3006 
3007 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3008 	if (!hdev->cpu_accessible_dma_pool) {
3009 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3010 		rc = -ENOMEM;
3011 		goto free_cpu_dma_mem;
3012 	}
3013 
3014 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3015 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3016 	if (rc) {
3017 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3018 		rc = -EFAULT;
3019 		goto free_cpu_accessible_dma_pool;
3020 	}
3021 
3022 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3023 								&gaudi2->virt_msix_db_dma_addr);
3024 	if (!gaudi2->virt_msix_db_cpu_addr) {
3025 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3026 		rc = -ENOMEM;
3027 		goto free_cpu_accessible_dma_pool;
3028 	}
3029 
3030 	spin_lock_init(&gaudi2->hw_queues_lock);
3031 
3032 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3033 							&gaudi2->scratchpad_bus_address,
3034 							GFP_KERNEL | __GFP_ZERO);
3035 	if (!gaudi2->scratchpad_kernel_address) {
3036 		rc = -ENOMEM;
3037 		goto free_virt_msix_db_mem;
3038 	}
3039 
3040 	gaudi2_user_mapped_blocks_init(hdev);
3041 
3042 	/* Initialize user interrupts */
3043 	gaudi2_user_interrupt_setup(hdev);
3044 
3045 	hdev->supports_coresight = true;
3046 	hdev->supports_sync_stream = true;
3047 	hdev->supports_cb_mapping = true;
3048 	hdev->supports_wait_for_multi_cs = false;
3049 
3050 	prop->supports_compute_reset = true;
3051 
3052 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3053 
3054 	return 0;
3055 
3056 free_virt_msix_db_mem:
3057 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3058 free_cpu_accessible_dma_pool:
3059 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3060 free_cpu_dma_mem:
3061 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3062 					hdev->cpu_accessible_dma_address);
3063 free_dma_pool:
3064 	dma_pool_destroy(hdev->dma_pool);
3065 free_gaudi2_device:
3066 	kfree(gaudi2);
3067 	return rc;
3068 }
3069 
3070 static int gaudi2_sw_fini(struct hl_device *hdev)
3071 {
3072 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3073 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3074 
3075 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3076 
3077 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3078 
3079 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3080 						hdev->cpu_accessible_dma_address);
3081 
3082 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3083 					gaudi2->scratchpad_bus_address);
3084 
3085 	dma_pool_destroy(hdev->dma_pool);
3086 
3087 	kfree(gaudi2);
3088 
3089 	return 0;
3090 }
3091 
3092 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3093 {
3094 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3095 						QM_GLBL_CFG1_CQF_STOP |
3096 						QM_GLBL_CFG1_CP_STOP);
3097 
3098 	/* stop also the ARC */
3099 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3100 }
3101 
3102 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3103 {
3104 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3105 						QM_GLBL_CFG1_CQF_FLUSH |
3106 						QM_GLBL_CFG1_CP_FLUSH);
3107 }
3108 
3109 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3110 {
3111 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3112 }
3113 
3114 /**
3115  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3116  *
3117  * @hdev: pointer to the habanalabs device structure
3118  * @queue_id: queue to clear fence counters to
3119  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3120  *              getting stuck on any fence value. otherwise set all fence
3121  *              counters to 0 (standard clear of fence counters)
3122  */
3123 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3124 						bool skip_fence)
3125 {
3126 	u32 size, reg_base;
3127 	u32 addr, val;
3128 
3129 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3130 
3131 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3132 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3133 
3134 	/*
3135 	 * in case we want to make sure that QM that is stuck on a fence will
3136 	 * be released we should set the fence counter to a higher value that
3137 	 * the value the QM waiting for. to comply with any fence counter of
3138 	 * any value we set maximum fence value to all counters
3139 	 */
3140 	val = skip_fence ? U32_MAX : 0;
3141 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3142 }
3143 
3144 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3145 {
3146 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3147 
3148 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3149 	gaudi2_flush_qman_common(hdev, reg_base);
3150 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3151 }
3152 
3153 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3154 {
3155 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3156 	int dcore, inst;
3157 
3158 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3159 		goto stop_edma_qmans;
3160 
3161 	/* Stop CPs of PDMA QMANs */
3162 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3163 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3164 
3165 stop_edma_qmans:
3166 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3167 		return;
3168 
3169 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3170 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3171 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3172 			u32 qm_base;
3173 
3174 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3175 				continue;
3176 
3177 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3178 					inst * DCORE_EDMA_OFFSET;
3179 
3180 			/* Stop CPs of EDMA QMANs */
3181 			gaudi2_stop_qman_common(hdev, qm_base);
3182 		}
3183 	}
3184 }
3185 
3186 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3187 {
3188 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3189 	u32 offset, i;
3190 
3191 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3192 
3193 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3194 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3195 			continue;
3196 
3197 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3198 	}
3199 }
3200 
3201 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3202 {
3203 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3204 	u32 reg_base;
3205 	int i;
3206 
3207 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3208 		return;
3209 
3210 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3211 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3212 			continue;
3213 
3214 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3215 		gaudi2_stop_qman_common(hdev, reg_base);
3216 	}
3217 }
3218 
3219 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3220 {
3221 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3222 	u32 reg_base;
3223 	int i;
3224 
3225 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3226 		return;
3227 
3228 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3229 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3230 			continue;
3231 
3232 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3233 		gaudi2_stop_qman_common(hdev, reg_base);
3234 	}
3235 }
3236 
3237 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3238 {
3239 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3240 	u32 reg_base, queue_id;
3241 	int i;
3242 
3243 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3244 		return;
3245 
3246 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3247 
3248 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3249 		if (!(hdev->nic_ports_mask & BIT(i)))
3250 			continue;
3251 
3252 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3253 		gaudi2_stop_qman_common(hdev, reg_base);
3254 	}
3255 }
3256 
3257 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3258 {
3259 	u32 reg_val;
3260 
3261 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3262 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3263 }
3264 
3265 static void gaudi2_dma_stall(struct hl_device *hdev)
3266 {
3267 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3268 	int dcore, inst;
3269 
3270 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3271 		goto stall_edma;
3272 
3273 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3274 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3275 
3276 stall_edma:
3277 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3278 		return;
3279 
3280 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3281 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3282 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3283 			u32 core_base;
3284 
3285 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3286 				continue;
3287 
3288 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3289 					inst * DCORE_EDMA_OFFSET;
3290 
3291 			/* Stall CPs of EDMA QMANs */
3292 			gaudi2_stall_dma_common(hdev, core_base);
3293 		}
3294 	}
3295 }
3296 
3297 static void gaudi2_mme_stall(struct hl_device *hdev)
3298 {
3299 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3300 	u32 offset, i;
3301 
3302 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3303 
3304 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3305 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3306 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3307 }
3308 
3309 static void gaudi2_tpc_stall(struct hl_device *hdev)
3310 {
3311 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3312 	u32 reg_base;
3313 	int i;
3314 
3315 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3316 		return;
3317 
3318 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3319 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3320 			continue;
3321 
3322 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3323 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3324 	}
3325 }
3326 
3327 static void gaudi2_rotator_stall(struct hl_device *hdev)
3328 {
3329 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3330 	u32 reg_val;
3331 	int i;
3332 
3333 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3334 		return;
3335 
3336 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3337 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3338 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3339 
3340 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3341 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3342 			continue;
3343 
3344 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3345 	}
3346 }
3347 
3348 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3349 {
3350 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3351 }
3352 
3353 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3354 {
3355 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3356 	int dcore, inst;
3357 
3358 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3359 		goto stop_edma_qmans;
3360 
3361 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3362 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3363 
3364 stop_edma_qmans:
3365 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3366 		return;
3367 
3368 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3369 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3370 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3371 			u32 qm_base;
3372 
3373 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3374 				continue;
3375 
3376 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3377 					inst * DCORE_EDMA_OFFSET;
3378 
3379 			/* Disable CPs of EDMA QMANs */
3380 			gaudi2_disable_qman_common(hdev, qm_base);
3381 		}
3382 	}
3383 }
3384 
3385 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3386 {
3387 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3388 	u32 offset, i;
3389 
3390 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3391 
3392 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3393 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3394 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3395 }
3396 
3397 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3398 {
3399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3400 	u32 reg_base;
3401 	int i;
3402 
3403 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3404 		return;
3405 
3406 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3407 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3408 			continue;
3409 
3410 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3411 		gaudi2_disable_qman_common(hdev, reg_base);
3412 	}
3413 }
3414 
3415 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3416 {
3417 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3418 	u32 reg_base;
3419 	int i;
3420 
3421 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3422 		return;
3423 
3424 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3425 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3426 			continue;
3427 
3428 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3429 		gaudi2_disable_qman_common(hdev, reg_base);
3430 	}
3431 }
3432 
3433 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3434 {
3435 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3436 	u32 reg_base, queue_id;
3437 	int i;
3438 
3439 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3440 		return;
3441 
3442 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3443 
3444 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3445 		if (!(hdev->nic_ports_mask & BIT(i)))
3446 			continue;
3447 
3448 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3449 		gaudi2_disable_qman_common(hdev, reg_base);
3450 	}
3451 }
3452 
3453 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3454 {
3455 	/* Disable the timestamp counter */
3456 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3457 
3458 	/* Zero the lower/upper parts of the 64-bit counter */
3459 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3460 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3461 
3462 	/* Enable the counter */
3463 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3464 }
3465 
3466 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3467 {
3468 	/* Disable the timestamp counter */
3469 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3470 }
3471 
3472 static const char *gaudi2_irq_name(u16 irq_number)
3473 {
3474 	switch (irq_number) {
3475 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3476 		return "gaudi2 cpu eq";
3477 	case GAUDI2_IRQ_NUM_COMPLETION:
3478 		return "gaudi2 completion";
3479 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3480 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3481 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3482 		return "gaudi2 user completion";
3483 	default:
3484 		return "invalid";
3485 	}
3486 }
3487 
3488 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3489 {
3490 	int i, irq, relative_idx;
3491 	struct hl_dec *dec;
3492 
3493 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3494 		irq = pci_irq_vector(hdev->pdev, i);
3495 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3496 
3497 		dec = hdev->dec + relative_idx / 2;
3498 
3499 		/* We pass different structures depending on the irq handler. For the abnormal
3500 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3501 		 * user_interrupt entry
3502 		 */
3503 		free_irq(irq, ((relative_idx % 2) ?
3504 				(void *) dec :
3505 				(void *) &hdev->user_interrupt[dec->core_id]));
3506 	}
3507 }
3508 
3509 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3510 {
3511 	int rc, i, irq_init_cnt, irq, relative_idx;
3512 	irq_handler_t irq_handler;
3513 	struct hl_dec *dec;
3514 
3515 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3516 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3517 			i++, irq_init_cnt++) {
3518 
3519 		irq = pci_irq_vector(hdev->pdev, i);
3520 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3521 
3522 		irq_handler = (relative_idx % 2) ?
3523 				hl_irq_handler_dec_abnrm :
3524 				hl_irq_handler_user_interrupt;
3525 
3526 		dec = hdev->dec + relative_idx / 2;
3527 
3528 		/* We pass different structures depending on the irq handler. For the abnormal
3529 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3530 		 * user_interrupt entry
3531 		 */
3532 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3533 				((relative_idx % 2) ?
3534 				(void *) dec :
3535 				(void *) &hdev->user_interrupt[dec->core_id]));
3536 		if (rc) {
3537 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3538 			goto free_dec_irqs;
3539 		}
3540 	}
3541 
3542 	return 0;
3543 
3544 free_dec_irqs:
3545 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3546 	return rc;
3547 }
3548 
3549 static int gaudi2_enable_msix(struct hl_device *hdev)
3550 {
3551 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3552 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3553 	int rc, irq, i, j, user_irq_init_cnt;
3554 	irq_handler_t irq_handler;
3555 	struct hl_cq *cq;
3556 
3557 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3558 		return 0;
3559 
3560 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3561 					PCI_IRQ_MSIX);
3562 	if (rc < 0) {
3563 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3564 			GAUDI2_MSIX_ENTRIES, rc);
3565 		return rc;
3566 	}
3567 
3568 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3569 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3570 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3571 	if (rc) {
3572 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3573 		goto free_irq_vectors;
3574 	}
3575 
3576 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3577 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3578 			&hdev->event_queue);
3579 	if (rc) {
3580 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3581 		goto free_completion_irq;
3582 	}
3583 
3584 	rc = gaudi2_dec_enable_msix(hdev);
3585 	if (rc) {
3586 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
3587 		goto free_event_irq;
3588 	}
3589 
3590 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3591 			user_irq_init_cnt < prop->user_interrupt_count;
3592 			i++, j++, user_irq_init_cnt++) {
3593 
3594 		irq = pci_irq_vector(hdev->pdev, i);
3595 		irq_handler = hl_irq_handler_user_interrupt;
3596 
3597 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3598 		if (rc) {
3599 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3600 			goto free_user_irq;
3601 		}
3602 	}
3603 
3604 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3605 
3606 	return 0;
3607 
3608 free_user_irq:
3609 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3610 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3611 
3612 		irq = pci_irq_vector(hdev->pdev, i);
3613 		free_irq(irq, &hdev->user_interrupt[j]);
3614 	}
3615 
3616 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3617 
3618 free_event_irq:
3619 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3620 	free_irq(irq, cq);
3621 
3622 free_completion_irq:
3623 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3624 	free_irq(irq, cq);
3625 
3626 free_irq_vectors:
3627 	pci_free_irq_vectors(hdev->pdev);
3628 
3629 	return rc;
3630 }
3631 
3632 static void gaudi2_sync_irqs(struct hl_device *hdev)
3633 {
3634 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3635 	int i, j;
3636 	int irq;
3637 
3638 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3639 		return;
3640 
3641 	/* Wait for all pending IRQs to be finished */
3642 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3643 
3644 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3645 		irq = pci_irq_vector(hdev->pdev, i);
3646 		synchronize_irq(irq);
3647 	}
3648 
3649 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3650 										i++, j++) {
3651 		irq = pci_irq_vector(hdev->pdev, i);
3652 		synchronize_irq(irq);
3653 	}
3654 
3655 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3656 }
3657 
3658 static void gaudi2_disable_msix(struct hl_device *hdev)
3659 {
3660 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3661 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3662 	struct hl_cq *cq;
3663 	int irq, i, j, k;
3664 
3665 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3666 		return;
3667 
3668 	gaudi2_sync_irqs(hdev);
3669 
3670 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3671 	free_irq(irq, &hdev->event_queue);
3672 
3673 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3674 
3675 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3676 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3677 
3678 		irq = pci_irq_vector(hdev->pdev, i);
3679 		free_irq(irq, &hdev->user_interrupt[j]);
3680 	}
3681 
3682 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3683 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3684 	free_irq(irq, cq);
3685 
3686 	pci_free_irq_vectors(hdev->pdev);
3687 
3688 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3689 }
3690 
3691 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3692 {
3693 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3694 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3695 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3696 	int rc;
3697 
3698 	if (hdev->pldm)
3699 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3700 	else
3701 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3702 
3703 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3704 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3705 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3706 			continue;
3707 
3708 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3709 
3710 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3711 
3712 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3713 
3714 		/* Wait till all traffic from decoder stops
3715 		 * before apply core reset.
3716 		 */
3717 		rc = hl_poll_timeout(
3718 				hdev,
3719 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3720 				graceful,
3721 				(graceful & graceful_pend_mask),
3722 				100,
3723 				timeout_usec);
3724 		if (rc)
3725 			dev_err(hdev->dev,
3726 				"Failed to stop traffic from DCORE%d Decoder %d\n",
3727 				dcore_id, dec_id);
3728 	}
3729 }
3730 
3731 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3732 {
3733 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3734 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3735 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3736 	int rc;
3737 
3738 	if (hdev->pldm)
3739 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3740 	else
3741 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3742 
3743 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3744 		dec_bit = PCIE_DEC_SHIFT + dec_id;
3745 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3746 			continue;
3747 
3748 		offset = dec_id * PCIE_VDEC_OFFSET;
3749 
3750 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3751 
3752 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3753 
3754 		/* Wait till all traffic from decoder stops
3755 		 * before apply core reset.
3756 		 */
3757 		rc = hl_poll_timeout(
3758 				hdev,
3759 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3760 				graceful,
3761 				(graceful & graceful_pend_mask),
3762 				100,
3763 				timeout_usec);
3764 		if (rc)
3765 			dev_err(hdev->dev,
3766 				"Failed to stop traffic from PCIe Decoder %d\n",
3767 				dec_id);
3768 	}
3769 }
3770 
3771 static void gaudi2_stop_dec(struct hl_device *hdev)
3772 {
3773 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3774 	int dcore_id;
3775 
3776 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3777 		return;
3778 
3779 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3780 		gaudi2_stop_dcore_dec(hdev, dcore_id);
3781 
3782 	gaudi2_stop_pcie_dec(hdev);
3783 }
3784 
3785 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3786 {
3787 	u32 reg_base, reg_val;
3788 
3789 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3790 	if (run_mode == HL_ENGINE_CORE_RUN)
3791 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3792 	else
3793 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3794 
3795 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3796 }
3797 
3798 static void gaudi2_halt_arcs(struct hl_device *hdev)
3799 {
3800 	u16 arc_id;
3801 
3802 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3803 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3804 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3805 	}
3806 }
3807 
3808 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3809 {
3810 	int rc;
3811 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
3812 
3813 	if (hdev->pldm)
3814 		timeout_usec *= 100;
3815 
3816 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3817 	if (run_mode == HL_ENGINE_CORE_RUN)
3818 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3819 	else
3820 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3821 
3822 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3823 				val, ((val & ack_mask) == ack_mask),
3824 				1000, timeout_usec);
3825 
3826 	if (!rc) {
3827 		/* Clear */
3828 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3829 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3830 	}
3831 
3832 	return rc;
3833 }
3834 
3835 static void gaudi2_reset_arcs(struct hl_device *hdev)
3836 {
3837 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3838 	u16 arc_id;
3839 
3840 	if (!gaudi2)
3841 		return;
3842 
3843 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3844 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3845 			gaudi2_clr_arc_id_cap(hdev, arc_id);
3846 }
3847 
3848 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3849 {
3850 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3851 	u32 queue_id;
3852 	int i;
3853 
3854 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3855 		return;
3856 
3857 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3858 
3859 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3860 		if (!(hdev->nic_ports_mask & BIT(i)))
3861 			continue;
3862 
3863 		gaudi2_qman_manual_flush_common(hdev, queue_id);
3864 	}
3865 }
3866 
3867 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3868 					u32 num_cores, u32 core_command)
3869 {
3870 	int i, rc;
3871 
3872 
3873 	for (i = 0 ; i < num_cores ; i++) {
3874 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3875 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3876 	}
3877 
3878 	for (i = 0 ; i < num_cores ; i++) {
3879 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3880 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3881 
3882 			if (rc) {
3883 				dev_err(hdev->dev, "failed to %s arc: %d\n",
3884 					(core_command == HL_ENGINE_CORE_HALT) ?
3885 					"HALT" : "RUN", core_ids[i]);
3886 				return -1;
3887 			}
3888 		}
3889 	}
3890 
3891 	return 0;
3892 }
3893 
3894 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3895 {
3896 	u32 wait_timeout_ms;
3897 
3898 	if (hdev->pldm)
3899 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3900 	else
3901 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3902 
3903 	if (fw_reset)
3904 		goto skip_engines;
3905 
3906 	gaudi2_stop_dma_qmans(hdev);
3907 	gaudi2_stop_mme_qmans(hdev);
3908 	gaudi2_stop_tpc_qmans(hdev);
3909 	gaudi2_stop_rot_qmans(hdev);
3910 	gaudi2_stop_nic_qmans(hdev);
3911 	msleep(wait_timeout_ms);
3912 
3913 	gaudi2_halt_arcs(hdev);
3914 	gaudi2_dma_stall(hdev);
3915 	gaudi2_mme_stall(hdev);
3916 	gaudi2_tpc_stall(hdev);
3917 	gaudi2_rotator_stall(hdev);
3918 
3919 	msleep(wait_timeout_ms);
3920 
3921 	gaudi2_stop_dec(hdev);
3922 
3923 	/*
3924 	 * in case of soft reset do a manual flush for QMANs (currently called
3925 	 * only for NIC QMANs
3926 	 */
3927 	if (!hard_reset)
3928 		gaudi2_nic_qmans_manual_flush(hdev);
3929 
3930 	gaudi2_disable_dma_qmans(hdev);
3931 	gaudi2_disable_mme_qmans(hdev);
3932 	gaudi2_disable_tpc_qmans(hdev);
3933 	gaudi2_disable_rot_qmans(hdev);
3934 	gaudi2_disable_nic_qmans(hdev);
3935 	gaudi2_disable_timestamp(hdev);
3936 
3937 skip_engines:
3938 	if (hard_reset) {
3939 		gaudi2_disable_msix(hdev);
3940 		return;
3941 	}
3942 
3943 	gaudi2_sync_irqs(hdev);
3944 }
3945 
3946 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3947 {
3948 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3949 
3950 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3951 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3952 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3953 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3954 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3955 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3956 }
3957 
3958 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3959 {
3960 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3961 	struct dynamic_fw_load_mgr *dynamic_loader;
3962 	struct cpu_dyn_regs *dyn_regs;
3963 
3964 	/* fill common fields */
3965 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3966 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3967 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3968 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3969 	fw_loader->skip_bmc = false;
3970 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3971 	fw_loader->dram_bar_id = DRAM_BAR_ID;
3972 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3973 
3974 	/* here we update initial values for few specific dynamic regs (as
3975 	 * before reading the first descriptor from FW those value has to be
3976 	 * hard-coded). in later stages of the protocol those values will be
3977 	 * updated automatically by reading the FW descriptor so data there
3978 	 * will always be up-to-date
3979 	 */
3980 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3981 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3982 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3983 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3984 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3985 }
3986 
3987 static int gaudi2_init_cpu(struct hl_device *hdev)
3988 {
3989 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3990 	int rc;
3991 
3992 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3993 		return 0;
3994 
3995 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
3996 		return 0;
3997 
3998 	rc = hl_fw_init_cpu(hdev);
3999 	if (rc)
4000 		return rc;
4001 
4002 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4003 
4004 	return 0;
4005 }
4006 
4007 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4008 {
4009 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4010 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4011 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4012 	struct cpu_dyn_regs *dyn_regs;
4013 	struct hl_eq *eq;
4014 	u32 status;
4015 	int err;
4016 
4017 	if (!hdev->cpu_queues_enable)
4018 		return 0;
4019 
4020 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4021 		return 0;
4022 
4023 	eq = &hdev->event_queue;
4024 
4025 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4026 
4027 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4028 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4029 
4030 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4031 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4032 
4033 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4034 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4035 
4036 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4037 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4038 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4039 
4040 	/* Used for EQ CI */
4041 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4042 
4043 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4044 
4045 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4046 
4047 	/* Let the ARC know we are ready as it is now handling those queues  */
4048 
4049 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4050 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4051 
4052 	err = hl_poll_timeout(
4053 		hdev,
4054 		mmCPU_IF_QUEUE_INIT,
4055 		status,
4056 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4057 		1000,
4058 		cpu_timeout);
4059 
4060 	if (err) {
4061 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4062 		return -EIO;
4063 	}
4064 
4065 	/* update FW application security bits */
4066 	if (prop->fw_cpu_boot_dev_sts0_valid)
4067 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4068 
4069 	if (prop->fw_cpu_boot_dev_sts1_valid)
4070 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4071 
4072 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4073 	return 0;
4074 }
4075 
4076 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4077 				u32 queue_id_base)
4078 {
4079 	struct hl_hw_queue *q;
4080 	u32 pq_id, pq_offset;
4081 
4082 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4083 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4084 		pq_offset = pq_id * 4;
4085 
4086 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4087 				lower_32_bits(q->bus_address));
4088 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4089 				upper_32_bits(q->bus_address));
4090 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4091 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4092 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4093 	}
4094 }
4095 
4096 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4097 {
4098 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4099 
4100 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4101 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4102 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4103 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4104 
4105 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4106 		cp_offset = cp_id * 4;
4107 
4108 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4109 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4110 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4111 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4112 	}
4113 
4114 	/* allow QMANs to accept work from ARC CQF */
4115 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4116 }
4117 
4118 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4119 				u32 queue_id_base)
4120 {
4121 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4122 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4123 
4124 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4125 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4126 
4127 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4128 		pq_offset = pq_id * 4;
4129 
4130 		/* Configure QMAN HBW to scratchpad as it is not needed */
4131 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4132 				lower_32_bits(gaudi2->scratchpad_bus_address));
4133 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4134 				upper_32_bits(gaudi2->scratchpad_bus_address));
4135 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4136 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4137 
4138 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4139 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4140 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4141 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4142 	}
4143 
4144 	/* Enable QMAN H/W completion */
4145 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4146 }
4147 
4148 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4149 {
4150 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4151 	u32 sp_reg_addr;
4152 
4153 	switch (queue_id_base) {
4154 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4155 		fallthrough;
4156 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4157 		fallthrough;
4158 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4159 		fallthrough;
4160 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4161 		fallthrough;
4162 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4163 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4164 		break;
4165 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4166 		fallthrough;
4167 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4168 		fallthrough;
4169 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4170 		fallthrough;
4171 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4172 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4173 		break;
4174 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4175 		fallthrough;
4176 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4177 		fallthrough;
4178 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4179 		fallthrough;
4180 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4181 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4182 		break;
4183 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4184 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4185 		break;
4186 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4187 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4188 		break;
4189 	default:
4190 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4191 		return 0;
4192 	}
4193 
4194 	return sp_reg_addr;
4195 }
4196 
4197 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4198 					u32 queue_id_base)
4199 {
4200 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4201 	int map_table_entry;
4202 
4203 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4204 
4205 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4206 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4207 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4208 
4209 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4210 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4211 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4212 
4213 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4214 
4215 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4216 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4217 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4218 
4219 	/* Enable the QMAN channel.
4220 	 * PDMA QMAN configuration is different, as we do not allow user to
4221 	 * access some of the CPs.
4222 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4223 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4224 	 */
4225 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4226 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4227 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4228 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4229 	else
4230 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4231 }
4232 
4233 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4234 		u32 queue_id_base)
4235 {
4236 	u32 pq_id;
4237 
4238 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4239 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4240 
4241 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4242 	gaudi2_init_qman_cp(hdev, reg_base);
4243 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4244 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4245 }
4246 
4247 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4248 				u32 dma_core_id, bool is_secure)
4249 {
4250 	u32 prot, irq_handler_offset;
4251 	struct cpu_dyn_regs *dyn_regs;
4252 	int map_table_entry;
4253 
4254 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4255 	if (is_secure)
4256 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4257 
4258 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4259 
4260 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4261 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4262 
4263 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4264 			lower_32_bits(CFG_BASE + irq_handler_offset));
4265 
4266 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4267 			upper_32_bits(CFG_BASE + irq_handler_offset));
4268 
4269 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4270 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4271 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4272 
4273 	/* Enable the DMA channel */
4274 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4275 }
4276 
4277 static void gaudi2_init_kdma(struct hl_device *hdev)
4278 {
4279 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4280 	u32 reg_base;
4281 
4282 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4283 		return;
4284 
4285 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4286 
4287 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4288 
4289 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4290 }
4291 
4292 static void gaudi2_init_pdma(struct hl_device *hdev)
4293 {
4294 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4295 	u32 reg_base;
4296 
4297 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4298 		return;
4299 
4300 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4301 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4302 
4303 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4304 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4305 
4306 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4307 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4308 
4309 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4310 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4311 
4312 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4313 }
4314 
4315 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4316 {
4317 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
4318 
4319 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4320 	base_edma_qman_id = edma_stream_base[seq];
4321 
4322 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4323 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4324 
4325 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4326 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4327 }
4328 
4329 static void gaudi2_init_edma(struct hl_device *hdev)
4330 {
4331 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4332 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4333 	int dcore, inst;
4334 
4335 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4336 		return;
4337 
4338 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4339 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4340 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4341 
4342 			if (!(prop->edma_enabled_mask & BIT(seq)))
4343 				continue;
4344 
4345 			gaudi2_init_edma_instance(hdev, seq);
4346 
4347 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4348 		}
4349 	}
4350 }
4351 
4352 /*
4353  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4354  * @hdev: pointer to habanalabs device structure.
4355  * @sob_id: sync object ID.
4356  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4357  * @interrupt_id: interrupt ID.
4358  *
4359  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4360  * write directly to the HBW host memory of the virtual MSI-X doorbell.
4361  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4362  *
4363  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4364  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4365  * completion, by decrementing the sync object value and re-arming the monitor.
4366  */
4367 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4368 							u32 first_mon_id, u32 interrupt_id)
4369 {
4370 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4371 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4372 	u64 addr;
4373 	u8 mask;
4374 
4375 	/* Reset the SOB value */
4376 	sob_offset = sob_id * sizeof(u32);
4377 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4378 
4379 	/* Configure 3 monitors:
4380 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4381 	 * 2. Decrement SOB value by 1.
4382 	 * 3. Re-arm the master monitor.
4383 	 */
4384 
4385 	first_mon_offset = first_mon_id * sizeof(u32);
4386 
4387 	/* 2nd monitor: Decrement SOB value by 1 */
4388 	mon_offset = first_mon_offset + sizeof(u32);
4389 
4390 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4391 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4392 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4393 
4394 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4395 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4396 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4397 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4398 
4399 	/* 3rd monitor: Re-arm the master monitor */
4400 	mon_offset = first_mon_offset + 2 * sizeof(u32);
4401 
4402 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4403 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4404 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4405 
4406 	sob_group = sob_id / 8;
4407 	mask = ~BIT(sob_id & 0x7);
4408 	mode = 0; /* comparison mode is "greater than or equal to" */
4409 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4410 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4411 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4412 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4413 
4414 	payload = arm;
4415 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4416 
4417 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4418 	mon_offset = first_mon_offset;
4419 
4420 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4421 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4422 
4423 	addr = gaudi2->virt_msix_db_dma_addr;
4424 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4425 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4426 
4427 	payload = interrupt_id;
4428 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4429 
4430 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4431 }
4432 
4433 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4434 {
4435 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4436 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4437 
4438 	/* Decoder normal/abnormal interrupts */
4439 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4440 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4441 			continue;
4442 
4443 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4444 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4445 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4446 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4447 
4448 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4449 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4450 		interrupt_id += 1;
4451 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4452 	}
4453 }
4454 
4455 static void gaudi2_init_sm(struct hl_device *hdev)
4456 {
4457 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4458 	u64 cq_address;
4459 	u32 reg_val;
4460 	int i;
4461 
4462 	/* Enable HBW/LBW CQ for completion monitors */
4463 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4464 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4465 
4466 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4467 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4468 
4469 	/* Enable only HBW CQ for KDMA completion monitor */
4470 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4471 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4472 
4473 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
4474 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4475 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4476 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4477 
4478 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4479 		cq_address =
4480 			hdev->completion_queue[i].bus_address;
4481 
4482 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4483 							lower_32_bits(cq_address));
4484 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4485 							upper_32_bits(cq_address));
4486 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4487 							ilog2(HL_CQ_SIZE_IN_BYTES));
4488 	}
4489 
4490 	/* Configure kernel ASID and MMU BP*/
4491 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4492 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4493 
4494 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4495 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
4496 }
4497 
4498 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4499 {
4500 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4501 	u32 reg_val;
4502 	int i;
4503 
4504 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4505 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4506 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4507 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4508 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4509 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4510 
4511 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4512 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4513 
4514 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4515 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4516 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4517 	}
4518 }
4519 
4520 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4521 							bool config_qman_only)
4522 {
4523 	u32 queue_id_base, reg_base;
4524 
4525 	switch (dcore_id) {
4526 	case 0:
4527 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4528 		break;
4529 	case 1:
4530 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4531 		break;
4532 	case 2:
4533 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4534 		break;
4535 	case 3:
4536 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4537 		break;
4538 	default:
4539 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4540 		return;
4541 	}
4542 
4543 	if (!config_qman_only) {
4544 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4545 		gaudi2_init_mme_acc(hdev, reg_base);
4546 	}
4547 
4548 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4549 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
4550 }
4551 
4552 static void gaudi2_init_mme(struct hl_device *hdev)
4553 {
4554 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4555 	int i;
4556 
4557 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4558 		return;
4559 
4560 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4561 		gaudi2_init_dcore_mme(hdev, i, false);
4562 
4563 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4564 	}
4565 }
4566 
4567 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4568 {
4569 	/* Mask arithmetic and QM interrupts in TPC */
4570 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4571 
4572 	/* Set 16 cache lines */
4573 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4574 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4575 }
4576 
4577 struct gaudi2_tpc_init_cfg_data {
4578 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4579 };
4580 
4581 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4582 					u32 offset, struct iterate_module_ctx *ctx)
4583 {
4584 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4585 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4586 	u32 queue_id_base;
4587 	u8 seq;
4588 
4589 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4590 
4591 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4592 		/* gets last sequence number */
4593 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4594 	else
4595 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4596 
4597 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4598 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4599 
4600 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4601 }
4602 
4603 static void gaudi2_init_tpc(struct hl_device *hdev)
4604 {
4605 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4606 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
4607 	struct iterate_module_ctx tpc_iter;
4608 
4609 	if (!hdev->asic_prop.tpc_enabled_mask)
4610 		return;
4611 
4612 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4613 		return;
4614 
4615 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4616 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4617 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4618 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4619 	tpc_iter.fn = &gaudi2_init_tpc_config;
4620 	tpc_iter.data = &init_cfg_data;
4621 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
4622 }
4623 
4624 static void gaudi2_init_rotator(struct hl_device *hdev)
4625 {
4626 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4627 	u32 i, reg_base, queue_id;
4628 
4629 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4630 
4631 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4632 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4633 		gaudi2_init_qman(hdev, reg_base, queue_id);
4634 
4635 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4636 	}
4637 }
4638 
4639 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4640 {
4641 	u32 sob_id;
4642 
4643 	/* VCMD normal interrupt */
4644 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4645 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4646 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4647 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4648 
4649 	/* VCMD abnormal interrupt */
4650 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4651 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4652 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4653 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4654 }
4655 
4656 static void gaudi2_init_dec(struct hl_device *hdev)
4657 {
4658 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4659 	u32 dcore_id, dec_id, dec_bit;
4660 	u64 base_addr;
4661 
4662 	if (!hdev->asic_prop.decoder_enabled_mask)
4663 		return;
4664 
4665 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4666 		return;
4667 
4668 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4669 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4670 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4671 
4672 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4673 				continue;
4674 
4675 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
4676 					BRDG_CTRL_BLOCK_OFFSET +
4677 					dcore_id * DCORE_OFFSET +
4678 					dec_id * DCORE_VDEC_OFFSET;
4679 
4680 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4681 
4682 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4683 		}
4684 
4685 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4686 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4687 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4688 			continue;
4689 
4690 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4691 				dec_id * DCORE_VDEC_OFFSET;
4692 
4693 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4694 
4695 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4696 	}
4697 }
4698 
4699 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4700 					u32 stlb_base, u32 asid, u64 phys_addr)
4701 {
4702 	u32 status, timeout_usec;
4703 	int rc;
4704 
4705 	if (hdev->pldm || !hdev->pdev)
4706 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4707 	else
4708 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4709 
4710 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4711 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4712 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4713 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4714 
4715 	rc = hl_poll_timeout(
4716 		hdev,
4717 		stlb_base + STLB_BUSY_OFFSET,
4718 		status,
4719 		!(status & 0x80000000),
4720 		1000,
4721 		timeout_usec);
4722 
4723 	if (rc) {
4724 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4725 		return rc;
4726 	}
4727 
4728 	return 0;
4729 }
4730 
4731 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4732 					u32 start_offset, u32 inv_start_val,
4733 					u32 flags)
4734 {
4735 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
4736 	if (flags & MMU_OP_CLEAR_MEMCACHE)
4737 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4738 
4739 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4740 		return;
4741 
4742 	WREG32(stlb_base + start_offset, inv_start_val);
4743 }
4744 
4745 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4746 						struct gaudi2_cache_invld_params *inv_params)
4747 {
4748 	u32 status, timeout_usec, start_offset;
4749 	int rc;
4750 
4751 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4752 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4753 
4754 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
4755 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4756 		rc = hl_poll_timeout(
4757 			hdev,
4758 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4759 			status,
4760 			status & 0x1,
4761 			1000,
4762 			timeout_usec);
4763 
4764 		if (rc)
4765 			return rc;
4766 
4767 		/* Need to manually reset the status to 0 */
4768 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4769 	}
4770 
4771 	/* Lower cache does not work with cache lines, hence we can skip its
4772 	 * invalidation upon map and invalidate only upon unmap
4773 	 */
4774 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4775 		return 0;
4776 
4777 	start_offset = inv_params->range_invalidation ?
4778 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4779 
4780 	rc = hl_poll_timeout(
4781 		hdev,
4782 		stlb_base + start_offset,
4783 		status,
4784 		!(status & 0x1),
4785 		1000,
4786 		timeout_usec);
4787 
4788 	return rc;
4789 }
4790 
4791 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4792 {
4793 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4794 	u32 hw_cap;
4795 
4796 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4797 
4798 	if (gaudi2->hw_cap_initialized & hw_cap)
4799 		return true;
4800 
4801 	return false;
4802 }
4803 
4804 /* this function shall be called only for HMMUs for which capability bit is set */
4805 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4806 {
4807 	u32 offset;
4808 
4809 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4810 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4811 }
4812 
4813 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4814 						struct gaudi2_cache_invld_params *inv_params)
4815 {
4816 	u32 start_offset;
4817 
4818 	if (inv_params->range_invalidation) {
4819 		/* Set the addresses range
4820 		 * Note: that the start address we set in register, is not included in
4821 		 * the range of the invalidation, by design.
4822 		 * that's why we need to set lower address than the one we actually
4823 		 * want to be included in the range invalidation.
4824 		 */
4825 		u64 start = inv_params->start_va - 1;
4826 
4827 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4828 
4829 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4830 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4831 
4832 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4833 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4834 
4835 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4836 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4837 
4838 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4839 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4840 	} else {
4841 		start_offset = STLB_INV_ALL_START_OFFSET;
4842 	}
4843 
4844 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4845 						inv_params->inv_start_val, inv_params->flags);
4846 }
4847 
4848 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4849 						int dcore_id, int hmmu_id,
4850 						struct gaudi2_cache_invld_params *inv_params)
4851 {
4852 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4853 
4854 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4855 }
4856 
4857 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4858 						int dcore_id, int hmmu_id,
4859 						struct gaudi2_cache_invld_params *inv_params)
4860 {
4861 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4862 
4863 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4864 }
4865 
4866 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4867 						struct gaudi2_cache_invld_params *inv_params)
4868 {
4869 	int dcore_id, hmmu_id;
4870 
4871 	/* first send all invalidation commands */
4872 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4873 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4874 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4875 				continue;
4876 
4877 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4878 		}
4879 	}
4880 
4881 	/* next, poll all invalidations status */
4882 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4883 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4884 			int rc;
4885 
4886 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4887 				continue;
4888 
4889 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4890 										inv_params);
4891 			if (rc)
4892 				return rc;
4893 		}
4894 	}
4895 
4896 	return 0;
4897 }
4898 
4899 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4900 {
4901 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4902 	struct gaudi2_cache_invld_params invld_params;
4903 	int rc = 0;
4904 
4905 	if (hdev->reset_info.hard_reset_pending)
4906 		return rc;
4907 
4908 	invld_params.range_invalidation = false;
4909 	invld_params.inv_start_val = 1;
4910 
4911 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4912 		invld_params.flags = flags;
4913 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4914 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4915 										&invld_params);
4916 	} else if (flags & MMU_OP_PHYS_PACK) {
4917 		invld_params.flags = 0;
4918 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4919 	}
4920 
4921 	return rc;
4922 }
4923 
4924 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4925 				u32 flags, u32 asid, u64 va, u64 size)
4926 {
4927 	struct gaudi2_cache_invld_params invld_params = {0};
4928 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4929 	u64 start_va, end_va;
4930 	u32 inv_start_val;
4931 	int rc = 0;
4932 
4933 	if (hdev->reset_info.hard_reset_pending)
4934 		return 0;
4935 
4936 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4937 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4938 			asid << MMU_RANGE_INV_ASID_SHIFT);
4939 	start_va = va;
4940 	end_va = start_va + size;
4941 
4942 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4943 		/* As range invalidation does not support zero address we will
4944 		 * do full invalidation in this case
4945 		 */
4946 		if (start_va) {
4947 			invld_params.range_invalidation = true;
4948 			invld_params.start_va = start_va;
4949 			invld_params.end_va = end_va;
4950 			invld_params.inv_start_val = inv_start_val;
4951 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
4952 		} else {
4953 			invld_params.range_invalidation = false;
4954 			invld_params.inv_start_val = 1;
4955 			invld_params.flags = flags;
4956 		}
4957 
4958 
4959 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4960 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4961 										&invld_params);
4962 		if (rc)
4963 			return rc;
4964 
4965 	} else if (flags & MMU_OP_PHYS_PACK) {
4966 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
4967 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
4968 		invld_params.inv_start_val = inv_start_val;
4969 		invld_params.flags = flags;
4970 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4971 	}
4972 
4973 	return rc;
4974 }
4975 
4976 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
4977 {
4978 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4979 	u64 hop0_addr;
4980 	u32 asid, max_asid = prop->max_asid;
4981 	int rc;
4982 
4983 	/* it takes too much time to init all of the ASIDs on palladium */
4984 	if (hdev->pldm)
4985 		max_asid = min((u32) 8, max_asid);
4986 
4987 	for (asid = 0 ; asid < max_asid ; asid++) {
4988 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
4989 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
4990 		if (rc) {
4991 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
4992 			return rc;
4993 		}
4994 	}
4995 
4996 	return 0;
4997 }
4998 
4999 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5000 {
5001 	u32 status, timeout_usec;
5002 	int rc;
5003 
5004 	if (hdev->pldm || !hdev->pdev)
5005 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5006 	else
5007 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5008 
5009 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5010 
5011 	rc = hl_poll_timeout(
5012 		hdev,
5013 		stlb_base + STLB_SRAM_INIT_OFFSET,
5014 		status,
5015 		!status,
5016 		1000,
5017 		timeout_usec);
5018 
5019 	if (rc)
5020 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5021 
5022 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5023 	if (rc)
5024 		return rc;
5025 
5026 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5027 
5028 	rc = hl_poll_timeout(
5029 		hdev,
5030 		stlb_base + STLB_INV_ALL_START_OFFSET,
5031 		status,
5032 		!status,
5033 		1000,
5034 		timeout_usec);
5035 
5036 	if (rc)
5037 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5038 
5039 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5040 
5041 	return rc;
5042 }
5043 
5044 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5045 {
5046 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5047 	u32 mmu_base, stlb_base;
5048 	int rc;
5049 
5050 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5051 		return 0;
5052 
5053 	mmu_base = mmPMMU_HBW_MMU_BASE;
5054 	stlb_base = mmPMMU_HBW_STLB_BASE;
5055 
5056 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5057 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5058 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5059 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5060 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5061 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5062 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5063 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5064 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5065 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5066 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5067 
5068 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5069 
5070 	if (PAGE_SIZE == SZ_64K) {
5071 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5072 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5073 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5074 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5075 			FIELD_PREP(
5076 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5077 				1),
5078 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5079 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5080 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5081 	}
5082 
5083 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5084 
5085 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5086 	if (rc)
5087 		return rc;
5088 
5089 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5090 
5091 	return 0;
5092 }
5093 
5094 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5095 				int hmmu_id)
5096 {
5097 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5098 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5099 	u32 offset, mmu_base, stlb_base, hw_cap;
5100 	u8 dmmu_seq;
5101 	int rc;
5102 
5103 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5104 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5105 
5106 	/*
5107 	 * return if DMMU is already initialized or if it's not out of
5108 	 * isolation (due to cluster binning)
5109 	 */
5110 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5111 		return 0;
5112 
5113 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5114 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5115 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5116 
5117 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5118 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5119 
5120 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5121 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5122 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5123 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5124 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5125 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5126 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5127 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5128 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5129 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5130 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5131 
5132 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5133 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5134 
5135 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5136 
5137 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5138 	if (rc)
5139 		return rc;
5140 
5141 	gaudi2->hw_cap_initialized |= hw_cap;
5142 
5143 	return 0;
5144 }
5145 
5146 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5147 {
5148 	int rc, dcore_id, hmmu_id;
5149 
5150 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5151 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5152 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5153 			if (rc)
5154 				return rc;
5155 		}
5156 
5157 	return 0;
5158 }
5159 
5160 static int gaudi2_mmu_init(struct hl_device *hdev)
5161 {
5162 	int rc;
5163 
5164 	rc = gaudi2_pci_mmu_init(hdev);
5165 	if (rc)
5166 		return rc;
5167 
5168 	rc = gaudi2_hbm_mmu_init(hdev);
5169 	if (rc)
5170 		return rc;
5171 
5172 	return 0;
5173 }
5174 
5175 static int gaudi2_hw_init(struct hl_device *hdev)
5176 {
5177 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5178 	int rc;
5179 
5180 	/* Let's mark in the H/W that we have reached this point. We check
5181 	 * this value in the reset_before_init function to understand whether
5182 	 * we need to reset the chip before doing H/W init. This register is
5183 	 * cleared by the H/W upon H/W reset
5184 	 */
5185 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5186 
5187 	/* Perform read from the device to make sure device is up */
5188 	RREG32(mmHW_STATE);
5189 
5190 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5191 	 * So we set it here and if anyone tries to move it later to
5192 	 * a different address, there will be an error
5193 	 */
5194 	if (hdev->asic_prop.iatu_done_by_fw)
5195 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5196 
5197 	/*
5198 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5199 	 * base address of dram
5200 	 */
5201 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5202 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5203 		return -EIO;
5204 	}
5205 
5206 	rc = gaudi2_init_cpu(hdev);
5207 	if (rc) {
5208 		dev_err(hdev->dev, "failed to initialize CPU\n");
5209 		return rc;
5210 	}
5211 
5212 	gaudi2_init_scrambler_hbm(hdev);
5213 	gaudi2_init_kdma(hdev);
5214 
5215 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5216 	if (rc) {
5217 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5218 		return rc;
5219 	}
5220 
5221 	rc = gaudi2->cpucp_info_get(hdev);
5222 	if (rc) {
5223 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5224 		return rc;
5225 	}
5226 
5227 	rc = gaudi2_mmu_init(hdev);
5228 	if (rc)
5229 		return rc;
5230 
5231 	gaudi2_init_pdma(hdev);
5232 	gaudi2_init_edma(hdev);
5233 	gaudi2_init_sm(hdev);
5234 	gaudi2_init_tpc(hdev);
5235 	gaudi2_init_mme(hdev);
5236 	gaudi2_init_rotator(hdev);
5237 	gaudi2_init_dec(hdev);
5238 	gaudi2_enable_timestamp(hdev);
5239 
5240 	rc = gaudi2_coresight_init(hdev);
5241 	if (rc)
5242 		goto disable_queues;
5243 
5244 	rc = gaudi2_enable_msix(hdev);
5245 	if (rc)
5246 		goto disable_queues;
5247 
5248 	/* Perform read from the device to flush all configuration */
5249 	RREG32(mmHW_STATE);
5250 
5251 	return 0;
5252 
5253 disable_queues:
5254 	gaudi2_disable_dma_qmans(hdev);
5255 	gaudi2_disable_mme_qmans(hdev);
5256 	gaudi2_disable_tpc_qmans(hdev);
5257 	gaudi2_disable_rot_qmans(hdev);
5258 	gaudi2_disable_nic_qmans(hdev);
5259 
5260 	gaudi2_disable_timestamp(hdev);
5261 
5262 	return rc;
5263 }
5264 
5265 /**
5266  * gaudi2_send_hard_reset_cmd - common function to handle reset
5267  *
5268  * @hdev: pointer to the habanalabs device structure
5269  *
5270  * This function handles the various possible scenarios for reset.
5271  * It considers if reset is handled by driver\FW and what FW components are loaded
5272  */
5273 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5274 {
5275 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5276 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
5277 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5278 	u32 cpu_boot_status;
5279 
5280 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5281 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5282 
5283 	/*
5284 	 * Handle corner case where failure was at cpu management app load,
5285 	 * and driver didn't detect any failure while loading the FW,
5286 	 * then at such scenario driver will send only HALT_MACHINE
5287 	 * and no one will respond to this request since FW already back to preboot
5288 	 * and it cannot handle such cmd.
5289 	 * In this case next time the management app loads it'll check on events register
5290 	 * which will still have the halt indication, and will reboot the device.
5291 	 * The solution is to let preboot clear all relevant registers before next boot
5292 	 * once driver send COMMS_RST_DEV.
5293 	 */
5294 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5295 
5296 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5297 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5298 		cpu_initialized = true;
5299 
5300 	/*
5301 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5302 	 * 1. FW reset: FW initiate the reset sequence
5303 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
5304 	 *                  reset but not the reset itself as it is not implemented
5305 	 *                  on their part) and LKD will wait to let FW complete the
5306 	 *                  sequence before issuing the reset
5307 	 */
5308 	if (!preboot_only && cpu_initialized) {
5309 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5310 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5311 
5312 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5313 	}
5314 
5315 	/*
5316 	 * When working with preboot (without Linux/Boot fit) we can
5317 	 * communicate only using the COMMS commands to issue halt/reset.
5318 	 *
5319 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5320 	 * attempt to revive the card in the small chance that the f/w has
5321 	 * experienced a watchdog event, which caused it to return back to preboot.
5322 	 * In that case, triggering reset through GIC won't help. We need to
5323 	 * trigger the reset as if Linux wasn't loaded.
5324 	 *
5325 	 * We do it only if the reset cause was HB, because that would be the
5326 	 * indication of such an event.
5327 	 *
5328 	 * In case watchdog hasn't expired but we still got HB, then this won't
5329 	 * do any damage.
5330 	 */
5331 
5332 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
5333 		if (hdev->asic_prop.hard_reset_done_by_fw)
5334 			hl_fw_ask_hard_reset_without_linux(hdev);
5335 		else
5336 			hl_fw_ask_halt_machine_without_linux(hdev);
5337 	}
5338 }
5339 
5340 /**
5341  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5342  *
5343  * @hdev: pointer to the habanalabs device structure
5344  * @reset_sleep_ms: sleep time in msec after reset
5345  *
5346  * This function executes hard reset based on if driver/FW should do the reset
5347  */
5348 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5349 {
5350 	if (hdev->asic_prop.hard_reset_done_by_fw) {
5351 		gaudi2_send_hard_reset_cmd(hdev);
5352 		return;
5353 	}
5354 
5355 	/* Set device to handle FLR by H/W as we will put the device
5356 	 * CPU to halt mode
5357 	 */
5358 	WREG32(mmPCIE_AUX_FLR_CTRL,
5359 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5360 
5361 	gaudi2_send_hard_reset_cmd(hdev);
5362 
5363 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5364 }
5365 
5366 /**
5367  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5368  *
5369  * @hdev: pointer to the habanalabs device structure
5370  * @reset_sleep_ms: sleep time in msec after reset
5371  * @driver_performs_reset: true if driver should perform reset instead of f/w.
5372  *
5373  * This function executes soft reset based on if driver/FW should do the reset
5374  */
5375 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5376 						bool driver_performs_reset)
5377 {
5378 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5379 
5380 	if (!driver_performs_reset) {
5381 		/* set SP to indicate reset request sent to FW */
5382 		if (dyn_regs->cpu_rst_status)
5383 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5384 		else
5385 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5386 
5387 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5388 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5389 		return;
5390 	}
5391 
5392 	/* Block access to engines, QMANs and SM during reset, these
5393 	 * RRs will be reconfigured after soft reset.
5394 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5395 	 */
5396 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5397 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5398 
5399 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5400 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5401 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5402 
5403 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5404 }
5405 
5406 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5407 								u32 poll_timeout_us)
5408 {
5409 	int i, rc = 0;
5410 	u32 reg_val;
5411 
5412 	/* without this sleep reset will not work */
5413 	msleep(reset_sleep_ms);
5414 
5415 	/* We poll the BTM done indication multiple times after reset due to
5416 	 * a HW errata 'GAUDI2_0300'
5417 	 */
5418 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5419 		rc = hl_poll_timeout(
5420 			hdev,
5421 			mmPSOC_GLOBAL_CONF_BTM_FSM,
5422 			reg_val,
5423 			reg_val == 0,
5424 			1000,
5425 			poll_timeout_us);
5426 
5427 	if (rc)
5428 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5429 }
5430 
5431 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5432 {
5433 	int i, rc = 0;
5434 	u32 reg_val;
5435 
5436 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5437 		rc = hl_poll_timeout(
5438 			hdev,
5439 			mmCPU_RST_STATUS_TO_HOST,
5440 			reg_val,
5441 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5442 			1000,
5443 			poll_timeout_us);
5444 
5445 	if (rc)
5446 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5447 				reg_val);
5448 }
5449 
5450 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5451 {
5452 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5453 	u32 poll_timeout_us, reset_sleep_ms;
5454 	bool driver_performs_reset = false;
5455 
5456 	if (hdev->pldm) {
5457 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5458 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5459 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5460 	} else {
5461 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5462 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5463 	}
5464 
5465 	if (fw_reset)
5466 		goto skip_reset;
5467 
5468 	gaudi2_reset_arcs(hdev);
5469 
5470 	if (hard_reset) {
5471 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5472 		gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5473 	} else {
5474 		/*
5475 		 * As we have to support also work with preboot only (which does not supports
5476 		 * soft reset) we have to make sure that security is disabled before letting driver
5477 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5478 		 * secured device with preboot only.
5479 		 */
5480 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5481 							!hdev->asic_prop.fw_security_enabled);
5482 		gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5483 	}
5484 
5485 skip_reset:
5486 	if (driver_performs_reset || hard_reset)
5487 		/*
5488 		 * Instead of waiting for BTM indication we should wait for preboot ready:
5489 		 * Consider the below scenario:
5490 		 * 1. FW update is being triggered
5491 		 *        - setting the dirty bit
5492 		 * 2. hard reset will be triggered due to the dirty bit
5493 		 * 3. FW initiates the reset:
5494 		 *        - dirty bit cleared
5495 		 *        - BTM indication cleared
5496 		 *        - preboot ready indication cleared
5497 		 * 4. during hard reset:
5498 		 *        - BTM indication will be set
5499 		 *        - BIST test performed and another reset triggered
5500 		 * 5. only after this reset the preboot will set the preboot ready
5501 		 *
5502 		 * when polling on BTM indication alone we can lose sync with FW while trying to
5503 		 * communicate with FW that is during reset.
5504 		 * to overcome this we will always wait to preboot ready indication
5505 		 */
5506 		if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) {
5507 			msleep(reset_sleep_ms);
5508 			hl_fw_wait_preboot_ready(hdev);
5509 		} else {
5510 			gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5511 		}
5512 	else
5513 		gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5514 
5515 	if (!gaudi2)
5516 		return;
5517 
5518 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5519 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5520 
5521 	/*
5522 	 * Clear NIC capability mask in order for driver to re-configure
5523 	 * NIC QMANs. NIC ports will not be re-configured during soft
5524 	 * reset as we call gaudi2_nic_init only during hard reset
5525 	 */
5526 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5527 
5528 	if (hard_reset) {
5529 		gaudi2->hw_cap_initialized &=
5530 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5531 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5532 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5533 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5534 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5535 
5536 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5537 	} else {
5538 		gaudi2->hw_cap_initialized &=
5539 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5540 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5541 			HW_CAP_ROT_MASK);
5542 	}
5543 }
5544 
5545 static int gaudi2_suspend(struct hl_device *hdev)
5546 {
5547 	int rc;
5548 
5549 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5550 	if (rc)
5551 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5552 
5553 	return rc;
5554 }
5555 
5556 static int gaudi2_resume(struct hl_device *hdev)
5557 {
5558 	return gaudi2_init_iatu(hdev);
5559 }
5560 
5561 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5562 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
5563 {
5564 	int rc;
5565 
5566 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5567 			VM_DONTCOPY | VM_NORESERVE;
5568 
5569 #ifdef _HAS_DMA_MMAP_COHERENT
5570 
5571 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5572 	if (rc)
5573 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5574 
5575 #else
5576 
5577 	rc = remap_pfn_range(vma, vma->vm_start,
5578 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5579 				size, vma->vm_page_prot);
5580 	if (rc)
5581 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5582 
5583 #endif
5584 
5585 	return rc;
5586 }
5587 
5588 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5589 {
5590 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5591 	u64 hw_cap_mask = 0;
5592 	u64 hw_tpc_cap_bit = 0;
5593 	u64 hw_nic_cap_bit = 0;
5594 	u64 hw_test_cap_bit = 0;
5595 
5596 	switch (hw_queue_id) {
5597 	case GAUDI2_QUEUE_ID_PDMA_0_0:
5598 	case GAUDI2_QUEUE_ID_PDMA_0_1:
5599 	case GAUDI2_QUEUE_ID_PDMA_1_0:
5600 		hw_cap_mask = HW_CAP_PDMA_MASK;
5601 		break;
5602 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5603 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5604 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5605 		break;
5606 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5607 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5608 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5609 		break;
5610 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5611 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5612 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5613 		break;
5614 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5615 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5616 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5617 		break;
5618 
5619 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5620 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
5621 		break;
5622 
5623 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5624 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5625 		break;
5626 
5627 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5628 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5629 		break;
5630 
5631 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5632 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5633 		break;
5634 
5635 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5636 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5637 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5638 
5639 		/* special case where cap bit refers to the first queue id */
5640 		if (!hw_tpc_cap_bit)
5641 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5642 		break;
5643 
5644 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5645 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5646 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5647 		break;
5648 
5649 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5650 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5651 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5652 		break;
5653 
5654 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5655 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5656 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5657 		break;
5658 
5659 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5660 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5661 		break;
5662 
5663 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5664 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5665 		break;
5666 
5667 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5668 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5669 
5670 		/* special case where cap bit refers to the first queue id */
5671 		if (!hw_nic_cap_bit)
5672 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5673 		break;
5674 
5675 	case GAUDI2_QUEUE_ID_CPU_PQ:
5676 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5677 
5678 	default:
5679 		return false;
5680 	}
5681 
5682 	if (hw_tpc_cap_bit)
5683 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5684 
5685 	if (hw_nic_cap_bit)
5686 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5687 
5688 	if (hw_test_cap_bit)
5689 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5690 
5691 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5692 }
5693 
5694 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5695 {
5696 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5697 
5698 	switch (arc_id) {
5699 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5700 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5701 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5702 
5703 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5704 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5705 
5706 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5707 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5708 
5709 	default:
5710 		return false;
5711 	}
5712 }
5713 
5714 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5715 {
5716 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5717 
5718 	switch (arc_id) {
5719 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5720 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5721 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5722 		break;
5723 
5724 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5725 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5726 		break;
5727 
5728 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5729 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5730 		break;
5731 
5732 	default:
5733 		return;
5734 	}
5735 }
5736 
5737 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5738 {
5739 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5740 
5741 	switch (arc_id) {
5742 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5743 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5744 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5745 		break;
5746 
5747 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5748 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5749 		break;
5750 
5751 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5752 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5753 		break;
5754 
5755 	default:
5756 		return;
5757 	}
5758 }
5759 
5760 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5761 {
5762 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5763 	u32 pq_offset, reg_base, db_reg_offset, db_value;
5764 
5765 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5766 		/*
5767 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5768 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5769 		 * number.
5770 		 */
5771 		pq_offset = (hw_queue_id & 0x3) * 4;
5772 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5773 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5774 	} else {
5775 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
5776 	}
5777 
5778 	db_value = pi;
5779 
5780 	/* ring the doorbell */
5781 	WREG32(db_reg_offset, db_value);
5782 
5783 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5784 		/* make sure device CPU will read latest data from host */
5785 		mb();
5786 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5787 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5788 	}
5789 }
5790 
5791 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5792 {
5793 	__le64 *pbd = (__le64 *) bd;
5794 
5795 	/* The QMANs are on the host memory so a simple copy suffice */
5796 	pqe[0] = pbd[0];
5797 	pqe[1] = pbd[1];
5798 }
5799 
5800 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5801 				dma_addr_t *dma_handle, gfp_t flags)
5802 {
5803 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5804 }
5805 
5806 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5807 				void *cpu_addr, dma_addr_t dma_handle)
5808 {
5809 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5810 }
5811 
5812 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5813 				u32 timeout, u64 *result)
5814 {
5815 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5816 
5817 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5818 		if (result)
5819 			*result = 0;
5820 		return 0;
5821 	}
5822 
5823 	if (!timeout)
5824 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5825 
5826 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5827 }
5828 
5829 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5830 				gfp_t mem_flags, dma_addr_t *dma_handle)
5831 {
5832 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5833 		return NULL;
5834 
5835 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5836 }
5837 
5838 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5839 {
5840 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5841 }
5842 
5843 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5844 						dma_addr_t *dma_handle)
5845 {
5846 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5847 }
5848 
5849 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5850 {
5851 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5852 }
5853 
5854 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5855 					enum dma_data_direction dir)
5856 {
5857 	dma_addr_t dma_addr;
5858 
5859 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5860 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5861 		return 0;
5862 
5863 	return dma_addr;
5864 }
5865 
5866 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5867 					enum dma_data_direction dir)
5868 {
5869 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5870 }
5871 
5872 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5873 {
5874 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5875 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5876 
5877 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5878 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5879 		return -EINVAL;
5880 	}
5881 
5882 	/* Just check if CB address is valid */
5883 
5884 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5885 					parser->user_cb_size,
5886 					asic_prop->sram_user_base_address,
5887 					asic_prop->sram_end_address))
5888 		return 0;
5889 
5890 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5891 					parser->user_cb_size,
5892 					asic_prop->dram_user_base_address,
5893 					asic_prop->dram_end_address))
5894 		return 0;
5895 
5896 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5897 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5898 						parser->user_cb_size,
5899 						asic_prop->dmmu.start_addr,
5900 						asic_prop->dmmu.end_addr))
5901 		return 0;
5902 
5903 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5904 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5905 					parser->user_cb_size,
5906 					asic_prop->pmmu.start_addr,
5907 					asic_prop->pmmu.end_addr) ||
5908 			hl_mem_area_inside_range(
5909 					(u64) (uintptr_t) parser->user_cb,
5910 					parser->user_cb_size,
5911 					asic_prop->pmmu_huge.start_addr,
5912 					asic_prop->pmmu_huge.end_addr))
5913 			return 0;
5914 
5915 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5916 		if (!hdev->pdev)
5917 			return 0;
5918 
5919 		if (!device_iommu_mapped(&hdev->pdev->dev))
5920 			return 0;
5921 	}
5922 
5923 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5924 		parser->user_cb, parser->user_cb_size);
5925 
5926 	return -EFAULT;
5927 }
5928 
5929 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5930 {
5931 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5932 
5933 	if (!parser->is_kernel_allocated_cb)
5934 		return gaudi2_validate_cb_address(hdev, parser);
5935 
5936 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5937 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5938 		return -EINVAL;
5939 	}
5940 
5941 	return 0;
5942 }
5943 
5944 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5945 {
5946 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5947 
5948 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5949 		return 0;
5950 
5951 	return hl_fw_send_heartbeat(hdev);
5952 }
5953 
5954 /* This is an internal helper function, used to update the KDMA mmu props.
5955  * Should be called with a proper kdma lock.
5956  */
5957 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5958 					   bool mmu_bypass, u32 asid)
5959 {
5960 	u32 rw_asid, rw_mmu_bp;
5961 
5962 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5963 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
5964 
5965 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
5966 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
5967 
5968 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
5969 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
5970 }
5971 
5972 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
5973 						u32 mon_payload, u32 sync_value)
5974 {
5975 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
5976 	u8 mask;
5977 
5978 	sob_offset = sob_id * 4;
5979 	mon_offset = mon_id * 4;
5980 
5981 	/* Reset the SOB value */
5982 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5983 
5984 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
5985 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
5986 
5987 	/* Configure this address with CS index because CQ_EN is set */
5988 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
5989 
5990 	sync_group_id = sob_id / 8;
5991 	mask = ~(1 << (sob_id & 0x7));
5992 	mode = 1; /* comparison mode is "equal to" */
5993 
5994 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
5995 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
5996 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
5997 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
5998 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
5999 }
6000 
6001 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6002 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6003 					u64 src_addr, u64 dst_addr,
6004 					u32 size, bool is_memset)
6005 {
6006 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6007 	struct hl_cq_entry *cq_base;
6008 	struct hl_cq *cq;
6009 	u64 comp_addr;
6010 	int rc;
6011 
6012 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6013 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6014 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6015 
6016 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6017 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6018 
6019 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6020 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6021 
6022 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6023 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6024 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6025 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6026 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6027 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6028 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6029 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6030 
6031 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6032 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6033 
6034 	if (is_memset)
6035 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6036 
6037 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6038 
6039 	/* Wait for completion */
6040 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6041 	cq_base = cq->kernel_address;
6042 	polling_addr = (u32 *)&cq_base[cq->ci];
6043 
6044 	if (hdev->pldm)
6045 		/* for each 1MB 20 second of timeout */
6046 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6047 	else
6048 		timeout = KDMA_TIMEOUT_USEC;
6049 
6050 	/* Polling */
6051 	rc = hl_poll_timeout_memory(
6052 			hdev,
6053 			polling_addr,
6054 			status,
6055 			(status == 1),
6056 			1000,
6057 			timeout,
6058 			true);
6059 
6060 	*polling_addr = 0;
6061 
6062 	if (rc) {
6063 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6064 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6065 		return rc;
6066 	}
6067 
6068 	cq->ci = hl_cq_inc_ptr(cq->ci);
6069 
6070 	return 0;
6071 }
6072 
6073 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6074 {
6075 	u32 i;
6076 
6077 	for (i = 0 ; i < size ; i += sizeof(u32))
6078 		WREG32(addr + i, val);
6079 }
6080 
6081 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6082 {
6083 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6084 
6085 	if (enable) {
6086 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6087 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6088 	} else {
6089 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6090 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6091 	}
6092 }
6093 
6094 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6095 {
6096 	u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6097 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6098 	u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6099 	struct packet_msg_short *msg_short_pkt;
6100 	dma_addr_t pkt_dma_addr;
6101 	size_t pkt_size;
6102 	int rc;
6103 
6104 	if (hdev->pldm)
6105 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6106 	else
6107 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6108 
6109 	pkt_size = sizeof(*msg_short_pkt);
6110 	msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6111 	if (!msg_short_pkt) {
6112 		dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6113 			hw_queue_id);
6114 		return -ENOMEM;
6115 	}
6116 
6117 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6118 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6119 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6120 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6121 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6122 
6123 	msg_short_pkt->value = cpu_to_le32(sob_val);
6124 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6125 
6126 	/* Reset the SOB value */
6127 	WREG32(sob_addr, 0);
6128 
6129 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6130 	if (rc) {
6131 		dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6132 			hw_queue_id);
6133 		goto free_pkt;
6134 	}
6135 
6136 	rc = hl_poll_timeout(
6137 			hdev,
6138 			sob_addr,
6139 			tmp,
6140 			(tmp == sob_val),
6141 			1000,
6142 			timeout_usec);
6143 
6144 	if (rc == -ETIMEDOUT) {
6145 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6146 			hw_queue_id, tmp);
6147 		rc = -EIO;
6148 	}
6149 
6150 	/* Reset the SOB value */
6151 	WREG32(sob_addr, 0);
6152 
6153 free_pkt:
6154 	hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6155 	return rc;
6156 }
6157 
6158 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6159 {
6160 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6161 
6162 	/*
6163 	 * check capability here as send_cpu_message() won't update the result
6164 	 * value if no capability
6165 	 */
6166 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6167 		return 0;
6168 
6169 	return hl_fw_test_cpu_queue(hdev);
6170 }
6171 
6172 static int gaudi2_test_queues(struct hl_device *hdev)
6173 {
6174 	int i, rc, ret_val = 0;
6175 
6176 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6177 		if (!gaudi2_is_queue_enabled(hdev, i))
6178 			continue;
6179 
6180 		gaudi2_qman_set_test_mode(hdev, i, true);
6181 		rc = gaudi2_test_queue(hdev, i);
6182 		gaudi2_qman_set_test_mode(hdev, i, false);
6183 
6184 		if (rc) {
6185 			ret_val = -EINVAL;
6186 			goto done;
6187 		}
6188 	}
6189 
6190 	rc = gaudi2_test_cpu_queue(hdev);
6191 	if (rc) {
6192 		ret_val = -EINVAL;
6193 		goto done;
6194 	}
6195 
6196 done:
6197 	return ret_val;
6198 }
6199 
6200 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6201 {
6202 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6203 	size_t irq_arr_size;
6204 
6205 	/* TODO: missing gaudi2_nic_resume.
6206 	 * Until implemented nic_hw_cap_initialized will remain zeroed
6207 	 */
6208 	gaudi2_init_arcs(hdev);
6209 	gaudi2_scrub_arcs_dccm(hdev);
6210 	gaudi2_init_security(hdev);
6211 
6212 	/* Unmask all IRQs since some could have been received during the soft reset */
6213 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6214 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6215 }
6216 
6217 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6218 					struct iterate_module_ctx *ctx)
6219 {
6220 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6221 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6222 	bool is_eng_idle;
6223 	int engine_idx;
6224 
6225 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6226 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6227 	else
6228 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6229 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6230 
6231 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6232 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6233 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6234 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6235 
6236 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6237 						IS_TPC_IDLE(tpc_cfg_sts);
6238 	*(idle_data->is_idle) &= is_eng_idle;
6239 
6240 	if (idle_data->mask && !is_eng_idle)
6241 		set_bit(engine_idx, idle_data->mask);
6242 
6243 	if (idle_data->e)
6244 		hl_engine_data_sprintf(idle_data->e,
6245 					idle_data->tpc_fmt, dcore, inst,
6246 					is_eng_idle ? "Y" : "N",
6247 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6248 }
6249 
6250 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6251 					struct engines_data *e)
6252 {
6253 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6254 		mme_arch_sts, dec_swreg15, dec_enabled_bit;
6255 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6256 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6257 	unsigned long *mask = (unsigned long *) mask_arr;
6258 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6259 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6260 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6261 	const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6262 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6263 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6264 	bool is_idle = true, is_eng_idle;
6265 	u64 offset;
6266 
6267 	struct gaudi2_tpc_idle_data tpc_idle_data = {
6268 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6269 		.e = e,
6270 		.mask = mask,
6271 		.is_idle = &is_idle,
6272 	};
6273 	struct iterate_module_ctx tpc_iter = {
6274 		.fn = &gaudi2_is_tpc_engine_idle,
6275 		.data = &tpc_idle_data,
6276 	};
6277 
6278 	int engine_idx, i, j;
6279 
6280 	/* EDMA, Two engines per Dcore */
6281 	if (e)
6282 		hl_engine_data_sprintf(e,
6283 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6284 			"----  ----  -------  ------------  ----------------------\n");
6285 
6286 	for (i = 0; i < NUM_OF_DCORES; i++) {
6287 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6288 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6289 
6290 			if (!(prop->edma_enabled_mask & BIT(seq)))
6291 				continue;
6292 
6293 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6294 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6295 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6296 
6297 			dma_core_idle_ind_mask =
6298 			RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6299 
6300 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6301 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6302 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6303 
6304 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6305 					IS_DMA_IDLE(dma_core_idle_ind_mask);
6306 			is_idle &= is_eng_idle;
6307 
6308 			if (mask && !is_eng_idle)
6309 				set_bit(engine_idx, mask);
6310 
6311 			if (e)
6312 				hl_engine_data_sprintf(e, edma_fmt, i, j,
6313 							is_eng_idle ? "Y" : "N",
6314 							qm_glbl_sts0,
6315 							dma_core_idle_ind_mask);
6316 		}
6317 	}
6318 
6319 	/* PDMA, Two engines in Full chip */
6320 	if (e)
6321 		hl_engine_data_sprintf(e,
6322 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6323 					"----  -------  ------------  ----------------------\n");
6324 
6325 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6326 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6327 		offset = i * PDMA_OFFSET;
6328 		dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6329 
6330 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6331 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6332 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6333 
6334 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6335 				IS_DMA_IDLE(dma_core_idle_ind_mask);
6336 		is_idle &= is_eng_idle;
6337 
6338 		if (mask && !is_eng_idle)
6339 			set_bit(engine_idx, mask);
6340 
6341 		if (e)
6342 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6343 						qm_glbl_sts0, dma_core_idle_ind_mask);
6344 	}
6345 
6346 	/* NIC, twelve macros in Full chip */
6347 	if (e && hdev->nic_ports_mask)
6348 		hl_engine_data_sprintf(e,
6349 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
6350 					"---  -------  ------------  ----------\n");
6351 
6352 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6353 		if (!(i & 1))
6354 			offset = i / 2 * NIC_OFFSET;
6355 		else
6356 			offset += NIC_QM_OFFSET;
6357 
6358 		if (!(hdev->nic_ports_mask & BIT(i)))
6359 			continue;
6360 
6361 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6362 
6363 
6364 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6365 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6366 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6367 
6368 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6369 		is_idle &= is_eng_idle;
6370 
6371 		if (mask && !is_eng_idle)
6372 			set_bit(engine_idx, mask);
6373 
6374 		if (e)
6375 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6376 						qm_glbl_sts0, qm_cgm_sts);
6377 	}
6378 
6379 	if (e)
6380 		hl_engine_data_sprintf(e,
6381 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
6382 					"---  ----  -------  ------------  ---------------\n");
6383 	/* MME, one per Dcore */
6384 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6385 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6386 		offset = i * DCORE_OFFSET;
6387 
6388 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6389 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6390 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6391 
6392 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6393 		is_idle &= is_eng_idle;
6394 
6395 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6396 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6397 		is_idle &= is_eng_idle;
6398 
6399 		if (e)
6400 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
6401 				is_eng_idle ? "Y" : "N",
6402 				qm_glbl_sts0,
6403 				mme_arch_sts);
6404 
6405 		if (mask && !is_eng_idle)
6406 			set_bit(engine_idx, mask);
6407 	}
6408 
6409 	/*
6410 	 * TPC
6411 	 */
6412 	if (e && prop->tpc_enabled_mask)
6413 		hl_engine_data_sprintf(e,
6414 			"\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
6415 			"----  ---  --------  ------------  ----------  ----------------------\n");
6416 
6417 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6418 
6419 	/* Decoders, two each Dcore and two shared PCIe decoders */
6420 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6421 		hl_engine_data_sprintf(e,
6422 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
6423 			"----  ---  -------  ---------------\n");
6424 
6425 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6426 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6427 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6428 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6429 				continue;
6430 
6431 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6432 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6433 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6434 
6435 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6436 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6437 			is_idle &= is_eng_idle;
6438 
6439 			if (mask && !is_eng_idle)
6440 				set_bit(engine_idx, mask);
6441 
6442 			if (e)
6443 				hl_engine_data_sprintf(e, dec_fmt, i, j,
6444 							is_eng_idle ? "Y" : "N", dec_swreg15);
6445 		}
6446 	}
6447 
6448 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6449 		hl_engine_data_sprintf(e,
6450 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
6451 			"--------  -------  ---------------\n");
6452 
6453 	/* Check shared(PCIe) decoders */
6454 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6455 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
6456 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6457 			continue;
6458 
6459 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6460 		offset = i * DCORE_DEC_OFFSET;
6461 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6462 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6463 		is_idle &= is_eng_idle;
6464 
6465 		if (mask && !is_eng_idle)
6466 			set_bit(engine_idx, mask);
6467 
6468 		if (e)
6469 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6470 						is_eng_idle ? "Y" : "N", dec_swreg15);
6471 	}
6472 
6473 	if (e)
6474 		hl_engine_data_sprintf(e,
6475 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6476 			"----  ----  -------  ------------  ----------  -------------\n");
6477 
6478 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6479 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6480 
6481 		offset = i * ROT_OFFSET;
6482 
6483 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6484 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6485 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6486 
6487 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6488 		is_idle &= is_eng_idle;
6489 
6490 		if (mask && !is_eng_idle)
6491 			set_bit(engine_idx, mask);
6492 
6493 		if (e)
6494 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6495 					qm_glbl_sts0, qm_cgm_sts, "-");
6496 	}
6497 
6498 	return is_idle;
6499 }
6500 
6501 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6502 	__acquires(&gaudi2->hw_queues_lock)
6503 {
6504 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6505 
6506 	spin_lock(&gaudi2->hw_queues_lock);
6507 }
6508 
6509 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6510 	__releases(&gaudi2->hw_queues_lock)
6511 {
6512 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6513 
6514 	spin_unlock(&gaudi2->hw_queues_lock);
6515 }
6516 
6517 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6518 {
6519 	return hdev->pdev->device;
6520 }
6521 
6522 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6523 {
6524 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6525 
6526 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6527 		return 0;
6528 
6529 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6530 }
6531 
6532 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6533 {
6534 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6535 }
6536 
6537 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6538 {
6539 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6540 
6541 	if (aggregate) {
6542 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
6543 		return gaudi2->events_stat_aggregate;
6544 	}
6545 
6546 	*size = (u32) sizeof(gaudi2->events_stat);
6547 	return gaudi2->events_stat;
6548 }
6549 
6550 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6551 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6552 {
6553 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6554 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
6555 
6556 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6557 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6558 
6559 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6560 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6561 
6562 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6563 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6564 
6565 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6566 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6567 
6568 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6569 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6570 }
6571 
6572 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6573 {
6574 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6575 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6576 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6577 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
6578 	u32 vdec_id, i, ports_offset, reg_val;
6579 	u8 edma_seq_base;
6580 
6581 	/* EDMA */
6582 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6583 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6584 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6585 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6586 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6587 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6588 	}
6589 
6590 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6591 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6592 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6593 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6594 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6595 	}
6596 
6597 	/* Sync Mngr */
6598 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6599 	/*
6600 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6601 	 * for any access type
6602 	 */
6603 	if (dcore_id > 0) {
6604 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6605 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6606 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6607 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6608 	}
6609 
6610 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6611 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6612 
6613 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6614 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
6615 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6616 				dcore_offset + ports_offset, 0);
6617 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6618 				dcore_offset + ports_offset, rw_asid);
6619 	}
6620 
6621 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6622 		ports_offset = i * DCORE_MME_WB_OFFSET;
6623 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6624 				dcore_offset + ports_offset, 0);
6625 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6626 				dcore_offset + ports_offset, rw_asid);
6627 	}
6628 
6629 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6630 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6631 
6632 	/*
6633 	 * Decoders
6634 	 */
6635 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6636 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6637 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6638 	}
6639 }
6640 
6641 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6642 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6643 {
6644 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6645 
6646 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6647 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6648 
6649 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6650 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6651 
6652 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6653 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6654 
6655 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6656 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6657 
6658 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6659 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6660 }
6661 
6662 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6663 							u32 rw_asid, u32 rw_mmu_bp)
6664 {
6665 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6666 
6667 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6668 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6669 }
6670 
6671 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6672 {
6673 	u32 reg_base, reg_offset, reg_val = 0;
6674 
6675 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
6676 
6677 	/* Enable MMU and configure asid for all relevant ARC regions */
6678 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6679 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6680 
6681 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6682 	WREG32(reg_base + reg_offset, reg_val);
6683 
6684 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6685 	WREG32(reg_base + reg_offset, reg_val);
6686 
6687 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6688 	WREG32(reg_base + reg_offset, reg_val);
6689 
6690 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6691 	WREG32(reg_base + reg_offset, reg_val);
6692 
6693 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6694 	WREG32(reg_base + reg_offset, reg_val);
6695 
6696 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6697 	WREG32(reg_base + reg_offset, reg_val);
6698 
6699 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6700 	WREG32(reg_base + reg_offset, reg_val);
6701 
6702 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6703 	WREG32(reg_base + reg_offset, reg_val);
6704 
6705 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6706 	WREG32(reg_base + reg_offset, reg_val);
6707 
6708 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6709 	WREG32(reg_base + reg_offset, reg_val);
6710 
6711 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6712 	WREG32(reg_base + reg_offset, reg_val);
6713 }
6714 
6715 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6716 {
6717 	int i;
6718 
6719 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6720 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6721 
6722 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6723 		gaudi2_arc_mmu_prepare(hdev, i, asid);
6724 
6725 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6726 		if (!gaudi2_is_queue_enabled(hdev, i))
6727 			continue;
6728 
6729 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6730 	}
6731 
6732 	return 0;
6733 }
6734 
6735 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6736 {
6737 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6738 	u32 rw_asid, offset;
6739 	int rc, i;
6740 
6741 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6742 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6743 
6744 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6745 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6746 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6747 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6748 
6749 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6750 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6751 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6752 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6753 
6754 	/* ROT */
6755 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6756 		offset = i * ROT_OFFSET;
6757 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6758 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6759 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6760 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6761 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6762 	}
6763 
6764 	/* Shared Decoders are the last bits in the decoders mask */
6765 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6766 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6767 
6768 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6769 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6770 
6771 	/* arc farm arc dup eng */
6772 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6773 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6774 
6775 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6776 	if (rc)
6777 		return rc;
6778 
6779 	return 0;
6780 }
6781 
6782 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
6783 					struct iterate_module_ctx *ctx)
6784 {
6785 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6786 
6787 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6788 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6789 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6790 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6791 }
6792 
6793 /* zero the MMUBP and set the ASID */
6794 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6795 {
6796 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6797 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
6798 	struct iterate_module_ctx tpc_iter = {
6799 		.fn = &gaudi2_tpc_mmu_prepare,
6800 		.data = &tpc_mmu_data,
6801 	};
6802 	int rc, i;
6803 
6804 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6805 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6806 		return -EINVAL;
6807 	}
6808 
6809 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6810 		return 0;
6811 
6812 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
6813 	if (rc)
6814 		return rc;
6815 
6816 	/* configure DCORE MMUs */
6817 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6818 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6819 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6820 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
6821 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
6822 
6823 	return 0;
6824 }
6825 
6826 static inline bool is_info_event(u32 event)
6827 {
6828 	switch (event) {
6829 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6830 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
6831 
6832 	/* return in case of NIC status event - these events are received periodically and not as
6833 	 * an indication to an error.
6834 	 */
6835 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
6836 		return true;
6837 	default:
6838 		return false;
6839 	}
6840 }
6841 
6842 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
6843 			bool ratelimited, const char *fmt, ...)
6844 {
6845 	struct va_format vaf;
6846 	va_list args;
6847 
6848 	va_start(args, fmt);
6849 	vaf.fmt = fmt;
6850 	vaf.va = &args;
6851 
6852 	if (ratelimited)
6853 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
6854 			gaudi2_irq_map_table[event_type].valid ?
6855 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
6856 	else
6857 		dev_err(hdev->dev, "%s: %pV\n",
6858 			gaudi2_irq_map_table[event_type].valid ?
6859 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
6860 
6861 	va_end(args);
6862 }
6863 
6864 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6865 		struct hl_eq_ecc_data *ecc_data)
6866 {
6867 	u64 ecc_address = 0, ecc_syndrom = 0;
6868 	u8 memory_wrapper_idx = 0;
6869 
6870 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
6871 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6872 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6873 
6874 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
6875 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6876 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6877 
6878 	return !!ecc_data->is_critical;
6879 }
6880 
6881 /*
6882  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6883  *
6884  * @idx: the current pi/ci value
6885  * @q_len: the queue length (power of 2)
6886  *
6887  * @return the cyclically decremented index
6888  */
6889 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6890 {
6891 	u32 mask = q_len - 1;
6892 
6893 	/*
6894 	 * modular decrement is equivalent to adding (queue_size -1)
6895 	 * later we take LSBs to make sure the value is in the
6896 	 * range [0, queue_len - 1]
6897 	 */
6898 	return (idx + q_len - 1) & mask;
6899 }
6900 
6901 /**
6902  * gaudi2_print_sw_config_stream_data - print SW config stream data
6903  *
6904  * @hdev: pointer to the habanalabs device structure
6905  * @stream: the QMAN's stream
6906  * @qman_base: base address of QMAN registers block
6907  */
6908 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6909 						u32 stream, u64 qman_base)
6910 {
6911 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6912 	u32 cq_ptr_lo_off, size;
6913 
6914 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6915 
6916 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6917 									stream * cq_ptr_lo_off;
6918 
6919 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6920 
6921 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6922 
6923 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6924 	size = RREG32(cq_tsize);
6925 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6926 		stream, cq_ptr, size);
6927 }
6928 
6929 /**
6930  * gaudi2_print_last_pqes_on_err - print last PQEs on error
6931  *
6932  * @hdev: pointer to the habanalabs device structure
6933  * @qid_base: first QID of the QMAN (out of 4 streams)
6934  * @stream: the QMAN's stream
6935  * @qman_base: base address of QMAN registers block
6936  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6937  */
6938 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6939 						u64 qman_base, bool pr_sw_conf)
6940 {
6941 	u32 ci, qm_ci_stream_off;
6942 	struct hl_hw_queue *q;
6943 	u64 pq_ci;
6944 	int i;
6945 
6946 	q = &hdev->kernel_queues[qid_base + stream];
6947 
6948 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6949 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6950 						stream * qm_ci_stream_off;
6951 
6952 	hdev->asic_funcs->hw_queues_lock(hdev);
6953 
6954 	if (pr_sw_conf)
6955 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6956 
6957 	ci = RREG32(pq_ci);
6958 
6959 	/* we should start printing form ci -1 */
6960 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6961 
6962 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6963 		struct hl_bd *bd;
6964 		u64 addr;
6965 		u32 len;
6966 
6967 		bd = q->kernel_address;
6968 		bd += ci;
6969 
6970 		len = le32_to_cpu(bd->len);
6971 		/* len 0 means uninitialized entry- break */
6972 		if (!len)
6973 			break;
6974 
6975 		addr = le64_to_cpu(bd->ptr);
6976 
6977 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
6978 			stream, ci, addr, len);
6979 
6980 		/* get previous ci, wrap if needed */
6981 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6982 	}
6983 
6984 	hdev->asic_funcs->hw_queues_unlock(hdev);
6985 }
6986 
6987 /**
6988  * print_qman_data_on_err - extract QMAN data on error
6989  *
6990  * @hdev: pointer to the habanalabs device structure
6991  * @qid_base: first QID of the QMAN (out of 4 streams)
6992  * @stream: the QMAN's stream
6993  * @qman_base: base address of QMAN registers block
6994  *
6995  * This function attempt to extract as much data as possible on QMAN error.
6996  * On upper CP print the SW config stream data and last 8 PQEs.
6997  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6998  */
6999 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7000 {
7001 	u32 i;
7002 
7003 	if (stream != QMAN_STREAMS) {
7004 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7005 		return;
7006 	}
7007 
7008 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7009 
7010 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7011 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7012 }
7013 
7014 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7015 							u64 qman_base, u32 qid_base)
7016 {
7017 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7018 	u64 glbl_sts_addr, arb_err_addr;
7019 	char reg_desc[32];
7020 
7021 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7022 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7023 
7024 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7025 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7026 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7027 
7028 		if (!glbl_sts_val)
7029 			continue;
7030 
7031 		if (i == QMAN_STREAMS) {
7032 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7033 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7034 		} else {
7035 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7036 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7037 		}
7038 
7039 		for (j = 0 ; j < num_error_causes ; j++)
7040 			if (glbl_sts_val & BIT(j)) {
7041 				gaudi2_print_event(hdev, event_type, true,
7042 					"%s. err cause: %s", reg_desc,
7043 					i == QMAN_STREAMS ?
7044 					gaudi2_qman_lower_cp_error_cause[j] :
7045 					gaudi2_qman_error_cause[j]);
7046 				error_count++;
7047 			}
7048 
7049 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7050 	}
7051 
7052 	arb_err_val = RREG32(arb_err_addr);
7053 
7054 	if (!arb_err_val)
7055 		goto out;
7056 
7057 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7058 		if (arb_err_val & BIT(j)) {
7059 			gaudi2_print_event(hdev, event_type, true,
7060 				"ARB_ERR. err cause: %s",
7061 				gaudi2_qman_arb_error_cause[j]);
7062 			error_count++;
7063 		}
7064 	}
7065 
7066 out:
7067 	return error_count;
7068 }
7069 
7070 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7071 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7072 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7073 			enum gaudi2_engine_id id, u64 *event_mask)
7074 {
7075 	u32 razwi_hi, razwi_lo, razwi_xy;
7076 	u16 eng_id = id;
7077 	u8 rd_wr_flag;
7078 
7079 	if (is_write) {
7080 		if (read_razwi_regs) {
7081 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7082 			razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7083 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7084 		} else {
7085 			razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg);
7086 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
7087 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
7088 		}
7089 		rd_wr_flag = HL_RAZWI_WRITE;
7090 	} else {
7091 		if (read_razwi_regs) {
7092 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7093 			razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7094 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7095 		} else {
7096 			razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg);
7097 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
7098 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
7099 		}
7100 		rd_wr_flag = HL_RAZWI_READ;
7101 	}
7102 
7103 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7104 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7105 
7106 	dev_err_ratelimited(hdev->dev,
7107 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7108 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7109 }
7110 
7111 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7112 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7113 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7114 			enum gaudi2_engine_id id, u64 *event_mask)
7115 {
7116 	u32 razwi_addr, razwi_xy;
7117 	u16 eng_id = id;
7118 	u8 rd_wr_flag;
7119 
7120 	if (is_write) {
7121 		if (read_razwi_regs) {
7122 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7123 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7124 		} else {
7125 			razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg);
7126 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
7127 		}
7128 
7129 		rd_wr_flag = HL_RAZWI_WRITE;
7130 	} else {
7131 		if (read_razwi_regs) {
7132 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7133 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7134 		} else {
7135 			razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg);
7136 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
7137 		}
7138 
7139 		rd_wr_flag = HL_RAZWI_READ;
7140 	}
7141 
7142 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7143 	dev_err_ratelimited(hdev->dev,
7144 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7145 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7146 						razwi_xy);
7147 }
7148 
7149 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7150 						enum razwi_event_sources module, u8 module_idx)
7151 {
7152 	switch (module) {
7153 	case RAZWI_TPC:
7154 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7155 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7156 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7157 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7158 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7159 
7160 	case RAZWI_MME:
7161 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7162 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7163 
7164 	case RAZWI_EDMA:
7165 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7166 			(module_idx % NUM_OF_EDMA_PER_DCORE));
7167 
7168 	case RAZWI_PDMA:
7169 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7170 
7171 	case RAZWI_NIC:
7172 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7173 
7174 	case RAZWI_DEC:
7175 		if (module_idx == 8)
7176 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7177 
7178 		if (module_idx == 9)
7179 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7180 					;
7181 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7182 				(module_idx % NUM_OF_DEC_PER_DCORE) +
7183 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7184 
7185 	case RAZWI_ROT:
7186 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7187 
7188 	default:
7189 		return GAUDI2_ENGINE_ID_SIZE;
7190 	}
7191 }
7192 
7193 /*
7194  * This function handles RR(Range register) hit events.
7195  * raised be initiators not PSOC RAZWI.
7196  */
7197 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7198 				enum razwi_event_sources module, u8 module_idx,
7199 				u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info,
7200 				u64 *event_mask)
7201 {
7202 	bool via_sft = false, read_razwi_regs = false;
7203 	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
7204 	u64 rtr_mstr_if_base_addr;
7205 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7206 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7207 	char initiator_name[64];
7208 
7209 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info)
7210 		read_razwi_regs = true;
7211 
7212 	switch (module) {
7213 	case RAZWI_TPC:
7214 		rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7215 		sprintf(initiator_name, "TPC_%u", module_idx);
7216 		break;
7217 	case RAZWI_MME:
7218 		sprintf(initiator_name, "MME_%u", module_idx);
7219 		switch (module_sub_idx) {
7220 		case MME_WAP0:
7221 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7222 			break;
7223 		case MME_WAP1:
7224 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7225 			break;
7226 		case MME_WRITE:
7227 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7228 			break;
7229 		case MME_READ:
7230 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7231 			break;
7232 		case MME_SBTE0:
7233 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7234 			break;
7235 		case MME_SBTE1:
7236 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7237 			break;
7238 		case MME_SBTE2:
7239 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7240 			break;
7241 		case MME_SBTE3:
7242 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7243 			break;
7244 		case MME_SBTE4:
7245 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7246 			break;
7247 		default:
7248 			return;
7249 		}
7250 		break;
7251 	case RAZWI_EDMA:
7252 		sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7253 		dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7254 		via_sft = true;
7255 		sprintf(initiator_name, "EDMA_%u", module_idx);
7256 		break;
7257 	case RAZWI_PDMA:
7258 		rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7259 		sprintf(initiator_name, "PDMA_%u", module_idx);
7260 		break;
7261 	case RAZWI_NIC:
7262 		rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7263 		sprintf(initiator_name, "NIC_%u", module_idx);
7264 		break;
7265 	case RAZWI_DEC:
7266 		rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7267 		sprintf(initiator_name, "DEC_%u", module_idx);
7268 		break;
7269 	case RAZWI_ROT:
7270 		rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7271 		sprintf(initiator_name, "ROT_%u", module_idx);
7272 		break;
7273 	default:
7274 		return;
7275 	}
7276 
7277 	if (!read_razwi_regs) {
7278 		if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) {
7279 			hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7280 								RAZWI_HAPPENED_AW;
7281 			hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7282 								RAZWI_HAPPENED_AR;
7283 		} else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) {
7284 			lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7285 								RAZWI_HAPPENED_AW;
7286 			lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7287 								RAZWI_HAPPENED_AR;
7288 		}
7289 		rtr_mstr_if_base_addr = 0;
7290 
7291 		goto dump_info;
7292 	}
7293 
7294 	/* Find router mstr_if register base */
7295 	if (via_sft) {
7296 		rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7297 				dcore_id * SFT_DCORE_OFFSET +
7298 				sft_id * SFT_IF_OFFSET +
7299 				RTR_MSTR_IF_OFFSET;
7300 	} else {
7301 		dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7302 		dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7303 		rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7304 				dcore_id * DCORE_OFFSET +
7305 				dcore_rtr_id * DCORE_RTR_OFFSET +
7306 				RTR_MSTR_IF_OFFSET;
7307 	}
7308 
7309 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
7310 	hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7311 
7312 	hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7313 
7314 	if (via_sft) {
7315 		/* SFT has separate MSTR_IF for LBW, only there we can
7316 		 * read the LBW razwi related registers
7317 		 */
7318 		u64 base;
7319 
7320 		base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7321 				RTR_LBW_MSTR_IF_OFFSET;
7322 
7323 		lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7324 
7325 		lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7326 	} else {
7327 		lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7328 
7329 		lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7330 	}
7331 
7332 dump_info:
7333 	/* check if there is no RR razwi indication at all */
7334 	if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
7335 		return;
7336 
7337 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
7338 	if (hbw_shrd_aw) {
7339 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7340 						initiator_name, read_razwi_regs, razwi_info,
7341 						eng_id, event_mask);
7342 
7343 		/* Clear event indication */
7344 		if (read_razwi_regs)
7345 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7346 	}
7347 
7348 	if (hbw_shrd_ar) {
7349 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7350 						initiator_name, read_razwi_regs, razwi_info,
7351 						eng_id, event_mask);
7352 
7353 		/* Clear event indication */
7354 		if (read_razwi_regs)
7355 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7356 	}
7357 
7358 	if (lbw_shrd_aw) {
7359 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7360 						initiator_name, read_razwi_regs, razwi_info,
7361 						eng_id, event_mask);
7362 
7363 		/* Clear event indication */
7364 		if (read_razwi_regs)
7365 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7366 	}
7367 
7368 	if (lbw_shrd_ar) {
7369 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7370 						initiator_name, read_razwi_regs, razwi_info,
7371 						eng_id, event_mask);
7372 
7373 		/* Clear event indication */
7374 		if (read_razwi_regs)
7375 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7376 	}
7377 }
7378 
7379 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7380 {
7381 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7382 	u8 mod_idx, sub_mod;
7383 
7384 	/* check all TPCs */
7385 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7386 		if (prop->tpc_enabled_mask & BIT(mod_idx))
7387 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL,
7388 								NULL);
7389 	}
7390 
7391 	/* check all MMEs */
7392 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7393 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7394 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7395 									sub_mod, NULL, NULL);
7396 
7397 	/* check all EDMAs */
7398 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7399 		if (prop->edma_enabled_mask & BIT(mod_idx))
7400 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL,
7401 								NULL);
7402 
7403 	/* check all PDMAs */
7404 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7405 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL,
7406 							NULL);
7407 
7408 	/* check all NICs */
7409 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7410 		if (hdev->nic_ports_mask & BIT(mod_idx))
7411 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7412 								NULL, NULL);
7413 
7414 	/* check all DECs */
7415 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7416 		if (prop->decoder_enabled_mask & BIT(mod_idx))
7417 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL,
7418 								NULL);
7419 
7420 	/* check all ROTs */
7421 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7422 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL);
7423 }
7424 
7425 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7426 {
7427 	switch (rtr_id) {
7428 	case DCORE0_RTR0:
7429 		return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7430 	case DCORE0_RTR1:
7431 		return "TPC0/1";
7432 	case DCORE0_RTR2:
7433 		return "TPC2/3";
7434 	case DCORE0_RTR3:
7435 		return "TPC4/5";
7436 	case DCORE0_RTR4:
7437 		return "MME0_SBTE0/1";
7438 	case DCORE0_RTR5:
7439 		return "MME0_WAP0/SBTE2";
7440 	case DCORE0_RTR6:
7441 		return "MME0_CTRL_WR/SBTE3";
7442 	case DCORE0_RTR7:
7443 		return "MME0_WAP1/CTRL_RD/SBTE4";
7444 	case DCORE1_RTR0:
7445 		return "MME1_WAP1/CTRL_RD/SBTE4";
7446 	case DCORE1_RTR1:
7447 		return "MME1_CTRL_WR/SBTE3";
7448 	case DCORE1_RTR2:
7449 		return "MME1_WAP0/SBTE2";
7450 	case DCORE1_RTR3:
7451 		return "MME1_SBTE0/1";
7452 	case DCORE1_RTR4:
7453 		return "TPC10/11";
7454 	case DCORE1_RTR5:
7455 		return "TPC8/9";
7456 	case DCORE1_RTR6:
7457 		return "TPC6/7";
7458 	case DCORE1_RTR7:
7459 		return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7460 	case DCORE2_RTR0:
7461 		return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7462 	case DCORE2_RTR1:
7463 		return "TPC16/17";
7464 	case DCORE2_RTR2:
7465 		return "TPC14/15";
7466 	case DCORE2_RTR3:
7467 		return "TPC12/13";
7468 	case DCORE2_RTR4:
7469 		return "MME2_SBTE0/1";
7470 	case DCORE2_RTR5:
7471 		return "MME2_WAP0/SBTE2";
7472 	case DCORE2_RTR6:
7473 		return "MME2_CTRL_WR/SBTE3";
7474 	case DCORE2_RTR7:
7475 		return "MME2_WAP1/CTRL_RD/SBTE4";
7476 	case DCORE3_RTR0:
7477 		return "MME3_WAP1/CTRL_RD/SBTE4";
7478 	case DCORE3_RTR1:
7479 		return "MME3_CTRL_WR/SBTE3";
7480 	case DCORE3_RTR2:
7481 		return "MME3_WAP0/SBTE2";
7482 	case DCORE3_RTR3:
7483 		return "MME3_SBTE0/1";
7484 	case DCORE3_RTR4:
7485 		return "TPC18/19";
7486 	case DCORE3_RTR5:
7487 		return "TPC20/21";
7488 	case DCORE3_RTR6:
7489 		return "TPC22/23";
7490 	case DCORE3_RTR7:
7491 		return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7492 	default:
7493 	return "N/A";
7494 	}
7495 }
7496 
7497 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
7498 {
7499 	switch (rtr_id) {
7500 	case DCORE0_RTR0:
7501 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
7502 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
7503 		engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
7504 		engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
7505 		engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7506 		engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
7507 		engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
7508 		engines[7] = GAUDI2_ENGINE_ID_PCIE;
7509 		engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
7510 		engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
7511 		engines[10] = GAUDI2_ENGINE_ID_PSOC;
7512 		return 11;
7513 
7514 	case DCORE0_RTR1:
7515 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
7516 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
7517 		return 2;
7518 
7519 	case DCORE0_RTR2:
7520 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
7521 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
7522 		return 2;
7523 
7524 	case DCORE0_RTR3:
7525 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
7526 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
7527 		return 2;
7528 
7529 	case DCORE0_RTR4:
7530 	case DCORE0_RTR5:
7531 	case DCORE0_RTR6:
7532 	case DCORE0_RTR7:
7533 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
7534 		return 1;
7535 
7536 	case DCORE1_RTR0:
7537 	case DCORE1_RTR1:
7538 	case DCORE1_RTR2:
7539 	case DCORE1_RTR3:
7540 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
7541 		return 1;
7542 
7543 	case DCORE1_RTR4:
7544 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
7545 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
7546 		return 2;
7547 
7548 	case DCORE1_RTR5:
7549 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
7550 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
7551 		return 2;
7552 
7553 	case DCORE1_RTR6:
7554 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
7555 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
7556 		return 2;
7557 
7558 	case DCORE1_RTR7:
7559 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
7560 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
7561 		engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
7562 		engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
7563 		engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
7564 		engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
7565 		engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
7566 		engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
7567 		engines[8] = GAUDI2_ENGINE_ID_KDMA;
7568 		engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
7569 		engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
7570 		return 11;
7571 
7572 	case DCORE2_RTR0:
7573 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
7574 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
7575 		engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
7576 		engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
7577 		engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
7578 		engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
7579 		engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
7580 		engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
7581 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7582 		return 9;
7583 
7584 	case DCORE2_RTR1:
7585 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
7586 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
7587 		return 2;
7588 
7589 	case DCORE2_RTR2:
7590 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
7591 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
7592 		return 2;
7593 
7594 	case DCORE2_RTR3:
7595 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
7596 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
7597 		return 2;
7598 
7599 	case DCORE2_RTR4:
7600 	case DCORE2_RTR5:
7601 	case DCORE2_RTR6:
7602 	case DCORE2_RTR7:
7603 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
7604 		return 1;
7605 	case DCORE3_RTR0:
7606 	case DCORE3_RTR1:
7607 	case DCORE3_RTR2:
7608 	case DCORE3_RTR3:
7609 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
7610 		return 1;
7611 	case DCORE3_RTR4:
7612 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
7613 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
7614 		return 2;
7615 	case DCORE3_RTR5:
7616 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
7617 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
7618 		return 2;
7619 	case DCORE3_RTR6:
7620 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
7621 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
7622 		return 2;
7623 	case DCORE3_RTR7:
7624 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
7625 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
7626 		engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
7627 		engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
7628 		engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
7629 		engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
7630 		engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
7631 		engines[7] = GAUDI2_ENGINE_ID_ROT_1;
7632 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7633 		return 9;
7634 	default:
7635 		return 0;
7636 	}
7637 }
7638 
7639 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7640 							u64 rtr_ctrl_base_addr, bool is_write,
7641 							u64 *event_mask)
7642 {
7643 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7644 	u32 razwi_hi, razwi_lo;
7645 	u8 rd_wr_flag;
7646 
7647 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7648 
7649 	if (is_write) {
7650 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7651 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7652 		rd_wr_flag = HL_RAZWI_WRITE;
7653 
7654 		/* Clear set indication */
7655 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7656 	} else {
7657 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7658 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7659 		rd_wr_flag = HL_RAZWI_READ;
7660 
7661 		/* Clear set indication */
7662 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7663 	}
7664 
7665 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
7666 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7667 	dev_err_ratelimited(hdev->dev,
7668 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7669 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7670 
7671 	dev_err_ratelimited(hdev->dev,
7672 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7673 }
7674 
7675 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7676 							u64 rtr_ctrl_base_addr, bool is_write,
7677 							u64 *event_mask)
7678 {
7679 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7680 	u32 razwi_addr;
7681 	u8 rd_wr_flag;
7682 
7683 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7684 
7685 	if (is_write) {
7686 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7687 		rd_wr_flag = HL_RAZWI_WRITE;
7688 
7689 		/* Clear set indication */
7690 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7691 	} else {
7692 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7693 		rd_wr_flag = HL_RAZWI_READ;
7694 
7695 		/* Clear set indication */
7696 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7697 	}
7698 
7699 	hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
7700 			event_mask);
7701 	dev_err_ratelimited(hdev->dev,
7702 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7703 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
7704 
7705 	dev_err_ratelimited(hdev->dev,
7706 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7707 }
7708 
7709 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
7710 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
7711 {
7712 	u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7713 						razwi_mask_info, razwi_intr = 0, error_count = 0;
7714 	int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7715 	u64 rtr_ctrl_base_addr;
7716 
7717 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7718 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7719 		if (!razwi_intr)
7720 			return 0;
7721 	}
7722 
7723 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7724 	xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7725 
7726 	dev_err_ratelimited(hdev->dev,
7727 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7728 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7729 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7730 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7731 		xy,
7732 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7733 
7734 	if (xy == 0) {
7735 		dev_err_ratelimited(hdev->dev,
7736 				"PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7737 		goto clear;
7738 	}
7739 
7740 	/* Find router id by router coordinates */
7741 	for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7742 		if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7743 			break;
7744 
7745 	if (rtr_id == rtr_map_arr_len) {
7746 		dev_err_ratelimited(hdev->dev,
7747 				"PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7748 		goto clear;
7749 	}
7750 
7751 	/* Find router mstr_if register base */
7752 	dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7753 	dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7754 	rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7755 				dcore_rtr_id * DCORE_RTR_OFFSET;
7756 
7757 	hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7758 	hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7759 	lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7760 	lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7761 
7762 	if (hbw_aw_set)
7763 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7764 						rtr_ctrl_base_addr, true, event_mask);
7765 
7766 	if (hbw_ar_set)
7767 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7768 						rtr_ctrl_base_addr, false, event_mask);
7769 
7770 	if (lbw_aw_set)
7771 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7772 						rtr_ctrl_base_addr, true, event_mask);
7773 
7774 	if (lbw_ar_set)
7775 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7776 						rtr_ctrl_base_addr, false, event_mask);
7777 
7778 	error_count++;
7779 
7780 clear:
7781 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7782 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7783 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7784 
7785 	return error_count;
7786 }
7787 
7788 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
7789 {
7790 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7791 
7792 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7793 
7794 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7795 		if (sts_val & BIT(i)) {
7796 			gaudi2_print_event(hdev, event_type, true,
7797 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
7798 			sts_clr_val |= BIT(i);
7799 			error_count++;
7800 		}
7801 	}
7802 
7803 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7804 
7805 	return error_count;
7806 }
7807 
7808 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7809 					struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
7810 {
7811 	enum razwi_event_sources module;
7812 	u32 error_count = 0;
7813 	u64 qman_base;
7814 	u8 index;
7815 
7816 	switch (event_type) {
7817 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7818 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7819 		qman_base = mmDCORE0_TPC0_QM_BASE +
7820 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7821 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7822 		module = RAZWI_TPC;
7823 		break;
7824 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7825 		qman_base = mmDCORE0_TPC6_QM_BASE;
7826 		module = RAZWI_TPC;
7827 		break;
7828 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7829 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7830 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7831 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7832 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7833 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7834 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7835 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7836 		module = RAZWI_MME;
7837 		break;
7838 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7839 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7840 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7841 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7842 		module = RAZWI_PDMA;
7843 		break;
7844 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7845 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7846 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7847 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7848 		module = RAZWI_ROT;
7849 		break;
7850 	default:
7851 		return 0;
7852 	}
7853 
7854 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7855 
7856 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
7857 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7858 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7859 		error_count += _gaudi2_handle_qm_sei_err(hdev,
7860 					qman_base + NIC_QM_OFFSET, event_type);
7861 
7862 	/* check if RAZWI happened */
7863 	if (razwi_info)
7864 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask);
7865 
7866 	return error_count;
7867 }
7868 
7869 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7870 {
7871 	u32 qid_base, error_count = 0;
7872 	u64 qman_base;
7873 	u8 index;
7874 
7875 	switch (event_type) {
7876 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7877 		index = event_type - GAUDI2_EVENT_TPC0_QM;
7878 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7879 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7880 		break;
7881 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7882 		index = event_type - GAUDI2_EVENT_TPC6_QM;
7883 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7884 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7885 		break;
7886 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7887 		index = event_type - GAUDI2_EVENT_TPC12_QM;
7888 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7889 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7890 		break;
7891 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7892 		index = event_type - GAUDI2_EVENT_TPC18_QM;
7893 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7894 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7895 		break;
7896 	case GAUDI2_EVENT_TPC24_QM:
7897 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7898 		qman_base = mmDCORE0_TPC6_QM_BASE;
7899 		break;
7900 	case GAUDI2_EVENT_MME0_QM:
7901 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7902 		qman_base = mmDCORE0_MME_QM_BASE;
7903 		break;
7904 	case GAUDI2_EVENT_MME1_QM:
7905 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7906 		qman_base = mmDCORE1_MME_QM_BASE;
7907 		break;
7908 	case GAUDI2_EVENT_MME2_QM:
7909 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7910 		qman_base = mmDCORE2_MME_QM_BASE;
7911 		break;
7912 	case GAUDI2_EVENT_MME3_QM:
7913 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7914 		qman_base = mmDCORE3_MME_QM_BASE;
7915 		break;
7916 	case GAUDI2_EVENT_HDMA0_QM:
7917 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7918 		qman_base = mmDCORE0_EDMA0_QM_BASE;
7919 		break;
7920 	case GAUDI2_EVENT_HDMA1_QM:
7921 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7922 		qman_base = mmDCORE0_EDMA1_QM_BASE;
7923 		break;
7924 	case GAUDI2_EVENT_HDMA2_QM:
7925 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7926 		qman_base = mmDCORE1_EDMA0_QM_BASE;
7927 		break;
7928 	case GAUDI2_EVENT_HDMA3_QM:
7929 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7930 		qman_base = mmDCORE1_EDMA1_QM_BASE;
7931 		break;
7932 	case GAUDI2_EVENT_HDMA4_QM:
7933 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7934 		qman_base = mmDCORE2_EDMA0_QM_BASE;
7935 		break;
7936 	case GAUDI2_EVENT_HDMA5_QM:
7937 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7938 		qman_base = mmDCORE2_EDMA1_QM_BASE;
7939 		break;
7940 	case GAUDI2_EVENT_HDMA6_QM:
7941 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7942 		qman_base = mmDCORE3_EDMA0_QM_BASE;
7943 		break;
7944 	case GAUDI2_EVENT_HDMA7_QM:
7945 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7946 		qman_base = mmDCORE3_EDMA1_QM_BASE;
7947 		break;
7948 	case GAUDI2_EVENT_PDMA0_QM:
7949 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7950 		qman_base = mmPDMA0_QM_BASE;
7951 		break;
7952 	case GAUDI2_EVENT_PDMA1_QM:
7953 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7954 		qman_base = mmPDMA1_QM_BASE;
7955 		break;
7956 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7957 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7958 		qman_base = mmROT0_QM_BASE;
7959 		break;
7960 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7961 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7962 		qman_base = mmROT1_QM_BASE;
7963 		break;
7964 	default:
7965 		return 0;
7966 	}
7967 
7968 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
7969 
7970 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7971 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7972 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7973 
7974 	return error_count;
7975 }
7976 
7977 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
7978 {
7979 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7980 
7981 	sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7982 
7983 	for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7984 		if (sts_val & BIT(i)) {
7985 			gaudi2_print_event(hdev, event_type, true,
7986 				"err cause: %s", gaudi2_arc_sei_error_cause[i]);
7987 			sts_clr_val |= BIT(i);
7988 			error_count++;
7989 		}
7990 	}
7991 
7992 	WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
7993 
7994 	return error_count;
7995 }
7996 
7997 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
7998 {
7999 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8000 
8001 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8002 
8003 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8004 		if (sts_val & BIT(i)) {
8005 			gaudi2_print_event(hdev, event_type, true,
8006 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8007 			sts_clr_val |= BIT(i);
8008 			error_count++;
8009 		}
8010 	}
8011 
8012 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8013 
8014 	return error_count;
8015 }
8016 
8017 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8018 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8019 					u64 *event_mask)
8020 {
8021 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8022 	u32 error_count = 0;
8023 	int i;
8024 
8025 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8026 		if (intr_cause_data & BIT(i)) {
8027 			gaudi2_print_event(hdev, event_type, true,
8028 				"err cause: %s", guadi2_rot_error_cause[i]);
8029 			error_count++;
8030 		}
8031 
8032 	/* check if RAZWI happened */
8033 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
8034 						&razwi_with_intr_cause->razwi_info, event_mask);
8035 
8036 	return error_count;
8037 }
8038 
8039 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8040 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8041 					u64 *event_mask)
8042 {
8043 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8044 	u32 error_count = 0;
8045 	int i;
8046 
8047 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8048 		if (intr_cause_data & BIT(i)) {
8049 			gaudi2_print_event(hdev, event_type, true,
8050 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8051 			error_count++;
8052 		}
8053 
8054 	/* check if RAZWI happened */
8055 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
8056 						&razwi_with_intr_cause->razwi_info, event_mask);
8057 
8058 	return error_count;
8059 }
8060 
8061 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8062 				struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8063 {
8064 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8065 	int i;
8066 
8067 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8068 		/* DCORE DEC */
8069 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8070 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8071 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8072 	else
8073 		/* PCIE DEC */
8074 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8075 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8076 
8077 	sts_val = RREG32(sts_addr);
8078 
8079 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8080 		if (sts_val & BIT(i)) {
8081 			gaudi2_print_event(hdev, event_type, true,
8082 				"err cause: %s", gaudi2_dec_error_cause[i]);
8083 			sts_clr_val |= BIT(i);
8084 			error_count++;
8085 		}
8086 	}
8087 
8088 	/* check if RAZWI happened */
8089 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info,
8090 						event_mask);
8091 
8092 	/* Write 1 clear errors */
8093 	WREG32(sts_addr, sts_clr_val);
8094 
8095 	return error_count;
8096 }
8097 
8098 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8099 				struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8100 {
8101 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8102 	int i;
8103 
8104 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8105 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8106 
8107 	sts_val = RREG32(sts_addr);
8108 
8109 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8110 		if (sts_val & BIT(i)) {
8111 			gaudi2_print_event(hdev, event_type, true,
8112 				"err cause: %s", guadi2_mme_error_cause[i]);
8113 			sts_clr_val |= BIT(i);
8114 			error_count++;
8115 		}
8116 	}
8117 
8118 	/* check if RAZWI happened */
8119 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8120 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info,
8121 							event_mask);
8122 
8123 	WREG32(sts_clr_addr, sts_clr_val);
8124 
8125 	return error_count;
8126 }
8127 
8128 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8129 					u64 intr_cause_data)
8130 {
8131 	int i, error_count = 0;
8132 
8133 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8134 		if (intr_cause_data & BIT(i)) {
8135 			gaudi2_print_event(hdev, event_type, true,
8136 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8137 			error_count++;
8138 		}
8139 
8140 	return error_count;
8141 }
8142 
8143 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8144 					struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8145 {
8146 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8147 	int i;
8148 
8149 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8150 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8151 
8152 	sts_val = RREG32(sts_addr);
8153 
8154 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8155 		if (sts_val & BIT(i)) {
8156 			gaudi2_print_event(hdev, event_type, true,
8157 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8158 			sts_clr_val |= BIT(i);
8159 			error_count++;
8160 		}
8161 	}
8162 
8163 	/* check if RAZWI happened on WAP0/1 */
8164 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info,
8165 						event_mask);
8166 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info,
8167 						event_mask);
8168 
8169 	WREG32(sts_clr_addr, sts_clr_val);
8170 
8171 	return error_count;
8172 }
8173 
8174 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8175 					u64 intr_cause_data)
8176 {
8177 	u32 error_count = 0;
8178 	int i;
8179 
8180 	/* If an AXI read or write error is received, an error is reported and
8181 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8182 	 * register of the KDMA engine, the reported error is always HBW even if
8183 	 * the actual error caused by a LBW KDMA transaction.
8184 	 */
8185 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8186 		if (intr_cause_data & BIT(i)) {
8187 			gaudi2_print_event(hdev, event_type, true,
8188 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8189 			error_count++;
8190 		}
8191 
8192 	return error_count;
8193 }
8194 
8195 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
8196 					u64 intr_cause_data)
8197 {
8198 	u32 error_count = 0;
8199 	int i;
8200 
8201 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8202 		if (intr_cause_data & BIT(i)) {
8203 			gaudi2_print_event(hdev, event_type, true,
8204 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8205 			error_count++;
8206 		}
8207 
8208 	return error_count;
8209 }
8210 
8211 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8212 {
8213 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8214 
8215 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8216 	if (RREG32(razwi_happened_addr)) {
8217 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8218 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8219 		WREG32(razwi_happened_addr, 0x1);
8220 	}
8221 
8222 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8223 	if (RREG32(razwi_happened_addr)) {
8224 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8225 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8226 		WREG32(razwi_happened_addr, 0x1);
8227 	}
8228 
8229 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8230 	if (RREG32(razwi_happened_addr)) {
8231 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8232 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8233 		WREG32(razwi_happened_addr, 0x1);
8234 	}
8235 
8236 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8237 	if (RREG32(razwi_happened_addr)) {
8238 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8239 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8240 		WREG32(razwi_happened_addr, 0x1);
8241 	}
8242 }
8243 
8244 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8245 					u64 intr_cause_data, u64 *event_mask)
8246 {
8247 	u32 error_count = 0;
8248 	int i;
8249 
8250 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8251 		if (!(intr_cause_data & BIT_ULL(i)))
8252 			continue;
8253 
8254 		gaudi2_print_event(hdev, event_type, true,
8255 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8256 		error_count++;
8257 
8258 		switch (intr_cause_data & BIT_ULL(i)) {
8259 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8260 			break;
8261 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8262 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8263 			break;
8264 		}
8265 	}
8266 
8267 	return error_count;
8268 }
8269 
8270 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8271 				u64 intr_cause_data)
8272 
8273 {
8274 	u32 error_count = 0;
8275 	int i;
8276 
8277 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8278 		if (intr_cause_data & BIT_ULL(i)) {
8279 			gaudi2_print_event(hdev, event_type, true,
8280 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8281 			error_count++;
8282 		}
8283 	}
8284 
8285 	return error_count;
8286 }
8287 
8288 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8289 {
8290 	u32 error_count = 0;
8291 	int i;
8292 
8293 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8294 		if (intr_cause_data & BIT_ULL(i)) {
8295 			gaudi2_print_event(hdev, event_type, true,
8296 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8297 			error_count++;
8298 		}
8299 	}
8300 
8301 	return error_count;
8302 }
8303 
8304 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8305 					u64 *event_mask)
8306 {
8307 	u32 valid, val;
8308 	u64 addr;
8309 
8310 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8311 
8312 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8313 		return;
8314 
8315 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8316 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8317 	addr <<= 32;
8318 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8319 
8320 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8321 				is_pmmu ? "PMMU" : "HMMU", addr);
8322 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8323 
8324 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8325 }
8326 
8327 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8328 {
8329 	u32 valid, val;
8330 	u64 addr;
8331 
8332 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8333 
8334 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8335 		return;
8336 
8337 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8338 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8339 	addr <<= 32;
8340 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8341 
8342 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8343 				is_pmmu ? "PMMU" : "HMMU", addr);
8344 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8345 }
8346 
8347 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8348 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8349 {
8350 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8351 	int i;
8352 
8353 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8354 
8355 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8356 		if (spi_sei_cause & BIT(i)) {
8357 			gaudi2_print_event(hdev, event_type, true,
8358 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8359 
8360 			if (i == 0)
8361 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8362 			else if (i == 1)
8363 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8364 
8365 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8366 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8367 
8368 			error_count++;
8369 		}
8370 	}
8371 
8372 	/* Clear cause */
8373 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8374 
8375 	/* Clear interrupt */
8376 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8377 
8378 	return error_count;
8379 }
8380 
8381 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8382 {
8383 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8384 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8385 	int i;
8386 
8387 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8388 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8389 
8390 	sei_cause_val = RREG32(sei_cause_addr);
8391 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8392 	cq_intr_val = RREG32(cq_intr_addr);
8393 
8394 	/* SEI interrupt */
8395 	if (sei_cause_cause) {
8396 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8397 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8398 					sei_cause_val);
8399 
8400 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8401 			if (!(sei_cause_cause & BIT(i)))
8402 				continue;
8403 
8404 			gaudi2_print_event(hdev, event_type, true,
8405 				"err cause: %s. %s: 0x%X\n",
8406 				gaudi2_sm_sei_cause[i].cause_name,
8407 				gaudi2_sm_sei_cause[i].log_name,
8408 				sei_cause_log & gaudi2_sm_sei_cause[i].log_mask);
8409 			error_count++;
8410 			break;
8411 		}
8412 
8413 		/* Clear SM_SEI_CAUSE */
8414 		WREG32(sei_cause_addr, 0);
8415 	}
8416 
8417 	/* CQ interrupt */
8418 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8419 		cq_intr_queue_index =
8420 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8421 					cq_intr_val);
8422 
8423 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8424 				sm_index, cq_intr_queue_index);
8425 		error_count++;
8426 
8427 		/* Clear CQ_INTR */
8428 		WREG32(cq_intr_addr, 0);
8429 	}
8430 
8431 	return error_count;
8432 }
8433 
8434 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8435 {
8436 	bool is_pmmu = false;
8437 	u32 error_count = 0;
8438 	u64 mmu_base;
8439 	u8 index;
8440 
8441 	switch (event_type) {
8442 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8443 		index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8444 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8445 		break;
8446 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8447 		index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8448 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8449 		break;
8450 	case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8451 		index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8452 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8453 		break;
8454 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8455 		index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8456 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8457 		break;
8458 	case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8459 		index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8460 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8461 		break;
8462 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8463 		index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8464 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8465 		break;
8466 	case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8467 		index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8468 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8469 		break;
8470 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8471 		index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8472 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8473 		break;
8474 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8475 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8476 		is_pmmu = true;
8477 		mmu_base = mmPMMU_HBW_MMU_BASE;
8478 		break;
8479 	default:
8480 		return 0;
8481 	}
8482 
8483 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
8484 							is_pmmu, event_mask);
8485 
8486 	return error_count;
8487 }
8488 
8489 
8490 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
8491 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8492 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8493 {
8494 	u32 addr, beat, beat_shift;
8495 	bool rc = false;
8496 
8497 	dev_err_ratelimited(hdev->dev,
8498 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8499 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8500 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8501 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8502 
8503 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8504 	dev_err_ratelimited(hdev->dev,
8505 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8506 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8507 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8508 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8509 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8510 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8511 
8512 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
8513 	for (beat = 0 ; beat < 4 ; beat++) {
8514 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8515 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8516 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8517 						beat,
8518 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8519 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8520 
8521 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8522 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8523 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8524 						beat,
8525 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8526 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8527 			rc |= true;
8528 		}
8529 
8530 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8531 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8532 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8533 			dev_err_ratelimited(hdev->dev,
8534 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8535 					beat,
8536 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8537 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8538 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8539 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8540 			rc |= true;
8541 		}
8542 
8543 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8544 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8545 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8546 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8547 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8548 	}
8549 
8550 	return rc;
8551 }
8552 
8553 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8554 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8555 {
8556 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8557 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8558 
8559 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8560 
8561 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8562 				derr & 0x3, derr & 0xc);
8563 
8564 	/* JIRA H6-3286 - the following prints may not be valid */
8565 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8566 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8567 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8568 		dev_err_ratelimited(hdev->dev,
8569 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8570 				i,
8571 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8572 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8573 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8574 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8575 	}
8576 }
8577 
8578 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8579 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8580 {
8581 	__le32 *col_cmd = ca_par_err_data->dbg_col;
8582 	__le16 *row_cmd = ca_par_err_data->dbg_row;
8583 	u32 i;
8584 
8585 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8586 
8587 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8588 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8589 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8590 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8591 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8592 }
8593 
8594 /* Returns true if hard reset is needed or false otherwise */
8595 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8596 					struct hl_eq_hbm_sei_data *sei_data)
8597 {
8598 	bool require_hard_reset = false;
8599 	u32 hbm_id, mc_id, cause_idx;
8600 
8601 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8602 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8603 
8604 	cause_idx = sei_data->hdr.sei_cause;
8605 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8606 		gaudi2_print_event(hdev, event_type, true,
8607 			"err cause: %s",
8608 			"Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx);
8609 		return true;
8610 	}
8611 
8612 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
8613 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8614 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
8615 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8616 		hbm_mc_sei_cause[cause_idx]);
8617 
8618 	/* Print error-specific info */
8619 	switch (cause_idx) {
8620 	case HBM_SEI_CATTRIP:
8621 		require_hard_reset = true;
8622 		break;
8623 
8624 	case  HBM_SEI_CMD_PARITY_EVEN:
8625 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8626 						le32_to_cpu(sei_data->hdr.cnt));
8627 		require_hard_reset = true;
8628 		break;
8629 
8630 	case  HBM_SEI_CMD_PARITY_ODD:
8631 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8632 						le32_to_cpu(sei_data->hdr.cnt));
8633 		require_hard_reset = true;
8634 		break;
8635 
8636 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
8637 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8638 						le32_to_cpu(sei_data->hdr.cnt));
8639 		require_hard_reset = true;
8640 		break;
8641 
8642 	case HBM_SEI_READ_ERR:
8643 		/* Unlike other SEI events, read error requires further processing of the
8644 		 * raw data in order to determine the root cause.
8645 		 */
8646 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8647 								&sei_data->read_err_info,
8648 								le32_to_cpu(sei_data->hdr.cnt));
8649 		break;
8650 
8651 	default:
8652 		break;
8653 	}
8654 
8655 	require_hard_reset |= !!sei_data->hdr.is_critical;
8656 
8657 	return require_hard_reset;
8658 }
8659 
8660 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
8661 				u64 intr_cause_data)
8662 {
8663 	if (intr_cause_data) {
8664 		gaudi2_print_event(hdev, event_type, true,
8665 			"temperature error cause: %#llx", intr_cause_data);
8666 		return 1;
8667 	}
8668 
8669 	return 0;
8670 }
8671 
8672 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8673 {
8674 	u32 i, error_count = 0;
8675 
8676 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8677 		if (intr_cause_data & hbm_mc_spi[i].mask) {
8678 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8679 				hbm_mc_spi[i].cause);
8680 			error_count++;
8681 		}
8682 
8683 	return error_count;
8684 }
8685 
8686 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8687 {
8688 	ktime_t zero_time = ktime_set(0, 0);
8689 
8690 	mutex_lock(&hdev->clk_throttling.lock);
8691 
8692 	switch (event_type) {
8693 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8694 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8695 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8696 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8697 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8698 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8699 		break;
8700 
8701 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8702 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8703 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8704 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8705 		break;
8706 
8707 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8708 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8709 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8710 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8711 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8712 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8713 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8714 		break;
8715 
8716 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8717 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8718 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8719 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8720 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8721 		break;
8722 
8723 	default:
8724 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8725 		break;
8726 	}
8727 
8728 	mutex_unlock(&hdev->clk_throttling.lock);
8729 }
8730 
8731 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
8732 					struct cpucp_pkt_sync_err *sync_err)
8733 {
8734 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8735 
8736 	gaudi2_print_event(hdev, event_type, false,
8737 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8738 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
8739 		q->pi, atomic_read(&q->ci));
8740 }
8741 
8742 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
8743 {
8744 	u32 p2p_intr, msix_gw_intr, error_count = 0;
8745 
8746 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8747 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8748 
8749 	if (p2p_intr) {
8750 		gaudi2_print_event(hdev, event_type, true,
8751 			"pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8752 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8753 
8754 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8755 		error_count++;
8756 	}
8757 
8758 	if (msix_gw_intr) {
8759 		gaudi2_print_event(hdev, event_type, true,
8760 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8761 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8762 
8763 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8764 		error_count++;
8765 	}
8766 
8767 	return error_count;
8768 }
8769 
8770 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
8771 			struct hl_eq_pcie_drain_ind_data *drain_data)
8772 {
8773 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
8774 
8775 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8776 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8777 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8778 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8779 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8780 
8781 	if (cause & BIT_ULL(0)) {
8782 		dev_err_ratelimited(hdev->dev,
8783 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8784 			!!lbw_rd, !!lbw_wr);
8785 		error_count++;
8786 	}
8787 
8788 	if (cause & BIT_ULL(1)) {
8789 		dev_err_ratelimited(hdev->dev,
8790 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8791 			hbw_rd, hbw_wr);
8792 		error_count++;
8793 	}
8794 
8795 	return error_count;
8796 }
8797 
8798 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8799 {
8800 	u32 error_count = 0;
8801 	int i;
8802 
8803 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8804 		if (intr_cause_data & BIT_ULL(i)) {
8805 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8806 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
8807 			error_count++;
8808 		}
8809 	}
8810 
8811 	return error_count;
8812 }
8813 
8814 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
8815 					struct cpucp_pkt_sync_err *sync_err)
8816 {
8817 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8818 
8819 	gaudi2_print_event(hdev, event_type, false,
8820 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8821 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8822 }
8823 
8824 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
8825 					struct hl_eq_engine_arc_intr_data *data)
8826 {
8827 	struct hl_engine_arc_dccm_queue_full_irq *q;
8828 	u32 intr_type, engine_id;
8829 	u64 payload;
8830 
8831 	intr_type = le32_to_cpu(data->intr_type);
8832 	engine_id = le32_to_cpu(data->engine_id);
8833 	payload = le64_to_cpu(data->payload);
8834 
8835 	switch (intr_type) {
8836 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8837 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8838 
8839 		gaudi2_print_event(hdev, event_type, true,
8840 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8841 				engine_id, intr_type, q->queue_index);
8842 		return 1;
8843 	default:
8844 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n");
8845 		return 0;
8846 	}
8847 }
8848 
8849 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8850 {
8851 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
8852 	bool reset_required = false, is_critical = false;
8853 	u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0;
8854 	u64 event_mask = 0;
8855 	u16 event_type;
8856 
8857 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
8858 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8859 
8860 	if (event_type >= GAUDI2_EVENT_SIZE) {
8861 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8862 				event_type, GAUDI2_EVENT_SIZE - 1);
8863 		return;
8864 	}
8865 
8866 	gaudi2->events_stat[event_type]++;
8867 	gaudi2->events_stat_aggregate[event_type]++;
8868 
8869 	switch (event_type) {
8870 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8871 		fallthrough;
8872 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8873 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8874 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8875 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8876 		is_critical = eq_entry->ecc_data.is_critical;
8877 		error_count++;
8878 		break;
8879 
8880 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8881 		fallthrough;
8882 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8883 		fallthrough;
8884 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8885 		error_count = gaudi2_handle_qman_err(hdev, event_type);
8886 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8887 		break;
8888 
8889 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8890 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8891 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
8892 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8893 		break;
8894 
8895 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8896 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
8897 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8898 		break;
8899 
8900 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8901 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8902 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8903 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type,
8904 					&eq_entry->razwi_info, &event_mask);
8905 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8906 		break;
8907 
8908 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8909 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8910 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8911 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
8912 					&eq_entry->razwi_with_intr_cause, &event_mask);
8913 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8914 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8915 		break;
8916 
8917 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8918 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8919 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
8920 						&eq_entry->razwi_with_intr_cause, &event_mask);
8921 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8922 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8923 		break;
8924 
8925 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8926 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8927 		error_count = gaudi2_handle_dec_err(hdev, index, event_type,
8928 						&eq_entry->razwi_info, &event_mask);
8929 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8930 		break;
8931 
8932 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8933 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8934 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8935 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8936 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8937 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8938 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8939 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8940 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8941 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8942 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8943 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8944 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8945 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8946 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8947 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8948 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8949 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8950 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8951 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8952 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8953 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8954 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8955 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8956 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8957 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8958 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8959 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
8960 					&eq_entry->razwi_with_intr_cause, &event_mask);
8961 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8962 		break;
8963 
8964 	case GAUDI2_EVENT_DEC0_SPI:
8965 	case GAUDI2_EVENT_DEC1_SPI:
8966 	case GAUDI2_EVENT_DEC2_SPI:
8967 	case GAUDI2_EVENT_DEC3_SPI:
8968 	case GAUDI2_EVENT_DEC4_SPI:
8969 	case GAUDI2_EVENT_DEC5_SPI:
8970 	case GAUDI2_EVENT_DEC6_SPI:
8971 	case GAUDI2_EVENT_DEC7_SPI:
8972 	case GAUDI2_EVENT_DEC8_SPI:
8973 	case GAUDI2_EVENT_DEC9_SPI:
8974 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8975 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8976 		error_count = gaudi2_handle_dec_err(hdev, index, event_type,
8977 					&eq_entry->razwi_info, &event_mask);
8978 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8979 		break;
8980 
8981 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8982 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8983 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8984 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8985 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8986 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8987 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8988 		error_count = gaudi2_handle_mme_err(hdev, index, event_type,
8989 				&eq_entry->razwi_info, &event_mask);
8990 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8991 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8992 		break;
8993 
8994 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
8995 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
8996 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
8997 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
8998 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
8999 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9000 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9001 		error_count = gaudi2_handle_mme_err(hdev, index, event_type,
9002 					&eq_entry->razwi_info, &event_mask);
9003 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9004 		break;
9005 
9006 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9007 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9008 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9009 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9010 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9011 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9012 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9013 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type,
9014 					&eq_entry->razwi_info, &event_mask);
9015 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9016 		break;
9017 
9018 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9019 	case GAUDI2_EVENT_KDMA0_CORE:
9020 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9021 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9022 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9023 		break;
9024 
9025 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9026 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9027 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9028 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9029 		break;
9030 
9031 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9032 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9033 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9034 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9035 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9036 		break;
9037 
9038 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9039 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9040 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9041 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9042 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9043 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9044 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9045 		break;
9046 
9047 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9048 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9049 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9050 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9051 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9052 		break;
9053 
9054 	case GAUDI2_EVENT_PMMU_FATAL_0:
9055 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9056 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9057 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9058 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9059 		break;
9060 
9061 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9062 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9063 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9064 		break;
9065 
9066 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9067 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9068 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9069 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9070 			reset_required = true;
9071 		}
9072 		error_count++;
9073 		break;
9074 
9075 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9076 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9077 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9078 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9079 		break;
9080 
9081 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9082 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9083 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9084 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9085 		break;
9086 
9087 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9088 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9089 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9090 		break;
9091 
9092 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9093 		error_count = gaudi2_handle_psoc_drain(hdev,
9094 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9095 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9096 		break;
9097 
9098 	case GAUDI2_EVENT_CPU_AXI_ECC:
9099 		error_count = GAUDI2_NA_EVENT_CAUSE;
9100 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9101 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9102 		break;
9103 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9104 		error_count = GAUDI2_NA_EVENT_CAUSE;
9105 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9106 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9107 		break;
9108 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9109 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9110 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9111 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9112 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9113 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9114 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9115 		break;
9116 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9117 		error_count = GAUDI2_NA_EVENT_CAUSE;
9118 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9119 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9120 		break;
9121 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9122 		error_count = GAUDI2_NA_EVENT_CAUSE;
9123 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9124 		break;
9125 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9126 		error_count = GAUDI2_NA_EVENT_CAUSE;
9127 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9128 		break;
9129 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9130 		error_count = GAUDI2_NA_EVENT_CAUSE;
9131 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9132 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9133 		break;
9134 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9135 		error_count = GAUDI2_NA_EVENT_CAUSE;
9136 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9137 		break;
9138 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9139 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9140 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9141 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9142 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9143 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9144 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9145 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9146 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9147 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9148 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9149 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9150 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9151 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9152 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9153 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9154 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9155 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9156 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9157 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9158 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9159 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9160 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9161 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9162 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9163 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9164 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9165 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9166 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9167 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9168 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9169 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9170 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9171 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9172 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9173 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9174 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9175 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9176 		fallthrough;
9177 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9178 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9179 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9180 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9181 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9182 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9183 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9184 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9185 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9186 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9187 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9188 		error_count = GAUDI2_NA_EVENT_CAUSE;
9189 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9190 		break;
9191 
9192 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9193 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9194 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9195 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9196 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9197 		error_count = GAUDI2_NA_EVENT_CAUSE;
9198 		break;
9199 
9200 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9201 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9202 		error_count = GAUDI2_NA_EVENT_CAUSE;
9203 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9204 		break;
9205 
9206 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9207 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9208 		error_count = GAUDI2_NA_EVENT_CAUSE;
9209 		/* Do nothing- FW will handle it */
9210 		break;
9211 
9212 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9213 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9214 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9215 		break;
9216 
9217 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9218 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9219 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9220 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9221 		break;
9222 
9223 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9224 		error_count = GAUDI2_NA_EVENT_CAUSE;
9225 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9226 		break;
9227 
9228 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9229 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9230 						le64_to_cpu(eq_entry->data[0]));
9231 		error_count = GAUDI2_NA_EVENT_CAUSE;
9232 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9233 		break;
9234 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9235 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9236 						le64_to_cpu(eq_entry->data[0]));
9237 		error_count = GAUDI2_NA_EVENT_CAUSE;
9238 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9239 		break;
9240 
9241 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9242 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9243 		error_count = GAUDI2_NA_EVENT_CAUSE;
9244 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9245 		break;
9246 
9247 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9248 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9249 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9250 		break;
9251 
9252 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9253 	case GAUDI2_EVENT_DEV_RESET_REQ:
9254 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9255 		error_count = GAUDI2_NA_EVENT_CAUSE;
9256 		is_critical = true;
9257 		break;
9258 
9259 	default:
9260 		if (gaudi2_irq_map_table[event_type].valid) {
9261 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9262 						event_type);
9263 			error_count = GAUDI2_NA_EVENT_CAUSE;
9264 		}
9265 	}
9266 
9267 	/* Make sure to dump an error in case no error cause was printed so far.
9268 	 * Note that although we have counted the errors, we use this number as
9269 	 * a boolean.
9270 	 */
9271 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9272 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9273 	else if (error_count == 0)
9274 		gaudi2_print_event(hdev, event_type, true,
9275 				"No error cause for H/W event %u\n", event_type);
9276 
9277 	if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
9278 				(hdev->hard_reset_on_fw_events ||
9279 				(hdev->asic_prop.fw_security_enabled && is_critical)))
9280 		goto reset_device;
9281 
9282 	/* Send unmask irq only for interrupts not classified as MSG */
9283 	if (!gaudi2_irq_map_table[event_type].msg)
9284 		hl_fw_unmask_irq(hdev, event_type);
9285 
9286 	if (event_mask)
9287 		hl_notifier_event_send_all(hdev, event_mask);
9288 
9289 	return;
9290 
9291 reset_device:
9292 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9293 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9294 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9295 	} else {
9296 		reset_flags |= HL_DRV_RESET_DELAY;
9297 	}
9298 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9299 	hl_device_cond_reset(hdev, reset_flags, event_mask);
9300 }
9301 
9302 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9303 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9304 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9305 {
9306 	u32 ctl, pkt_size;
9307 	int rc = 0;
9308 
9309 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9310 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9311 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9312 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9313 
9314 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9315 	lin_dma_pkt->src_addr = cpu_to_le64(val);
9316 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9317 	lin_dma_pkt->tsize = cpu_to_le32(size);
9318 
9319 	pkt_size = sizeof(struct packet_lin_dma);
9320 
9321 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9322 	if (rc)
9323 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9324 				hw_queue_id);
9325 
9326 	return rc;
9327 }
9328 
9329 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9330 {
9331 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9332 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9333 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9334 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9335 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9336 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9337 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9338 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9339 	void *lin_dma_pkts_arr;
9340 	dma_addr_t pkt_dma_addr;
9341 	int rc = 0, dma_num = 0;
9342 
9343 	if (prop->edma_enabled_mask == 0) {
9344 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9345 		return -EIO;
9346 	}
9347 
9348 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9349 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9350 	comp_addr = CFG_BASE + sob_addr;
9351 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9352 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9353 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9354 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9355 
9356 	/* Calculate how many lin dma pkts we'll need */
9357 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9358 	pkt_size = sizeof(struct packet_lin_dma);
9359 
9360 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9361 					&pkt_dma_addr, GFP_KERNEL);
9362 	if (!lin_dma_pkts_arr)
9363 		return -ENOMEM;
9364 
9365 	/*
9366 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9367 	 * only the first one to restore later
9368 	 * also set the sob addr for all edma cores for completion.
9369 	 * set QM as trusted to allow it to access physical address with MMU bp.
9370 	 */
9371 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9372 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9373 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9374 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9375 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9376 
9377 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9378 				continue;
9379 
9380 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9381 					edma_offset, mmubp);
9382 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9383 					lower_32_bits(comp_addr));
9384 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9385 					upper_32_bits(comp_addr));
9386 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9387 					comp_val);
9388 			gaudi2_qman_set_test_mode(hdev,
9389 					edma_queues_id[dcore] + 4 * edma_idx, true);
9390 		}
9391 	}
9392 
9393 	WREG32(sob_addr, 0);
9394 
9395 	while (cur_addr < end_addr) {
9396 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9397 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9398 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9399 
9400 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9401 					continue;
9402 
9403 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9404 
9405 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9406 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9407 					pkt_dma_addr + dma_num * pkt_size,
9408 					edma_queues_id[dcore] + edma_idx * 4,
9409 					chunk_size, cur_addr, val);
9410 				if (rc)
9411 					goto end;
9412 
9413 				dma_num++;
9414 				cur_addr += chunk_size;
9415 				if (cur_addr == end_addr)
9416 					break;
9417 			}
9418 		}
9419 	}
9420 
9421 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9422 	if (rc) {
9423 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9424 		goto end;
9425 	}
9426 end:
9427 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9428 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9429 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9430 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9431 
9432 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9433 				continue;
9434 
9435 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9436 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9437 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9438 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9439 			gaudi2_qman_set_test_mode(hdev,
9440 					edma_queues_id[dcore] + 4 * edma_idx, false);
9441 		}
9442 	}
9443 
9444 	WREG32(sob_addr, 0);
9445 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
9446 
9447 	return rc;
9448 }
9449 
9450 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9451 {
9452 	int rc;
9453 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9454 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
9455 
9456 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9457 
9458 	if (rc)
9459 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9460 				prop->dram_user_base_address, size);
9461 	return rc;
9462 }
9463 
9464 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9465 {
9466 	int rc;
9467 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9468 	u64 val = hdev->memory_scrub_val;
9469 	u64 addr, size;
9470 
9471 	if (!hdev->memory_scrub)
9472 		return 0;
9473 
9474 	/* scrub SRAM */
9475 	addr = prop->sram_user_base_address;
9476 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9477 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9478 			addr, addr + size, val);
9479 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9480 	if (rc) {
9481 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9482 		return rc;
9483 	}
9484 
9485 	/* scrub DRAM */
9486 	rc = gaudi2_scrub_device_dram(hdev, val);
9487 	if (rc) {
9488 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9489 		return rc;
9490 	}
9491 	return 0;
9492 }
9493 
9494 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9495 {
9496 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9497 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9498 	u32 val, size, offset;
9499 	int dcore_id;
9500 
9501 	offset = hdev->asic_prop.first_available_cq[0] * 4;
9502 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9503 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9504 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9505 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9506 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9507 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9508 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9509 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9510 
9511 	/* memset dcore0 CQ registers */
9512 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9513 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9514 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9515 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9516 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9517 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9518 
9519 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9520 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9521 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9522 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9523 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9524 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9525 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9526 
9527 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9528 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9529 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9530 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9531 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9532 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9533 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9534 
9535 		cq_lbw_l_addr += DCORE_OFFSET;
9536 		cq_lbw_h_addr += DCORE_OFFSET;
9537 		cq_lbw_data_addr += DCORE_OFFSET;
9538 		cq_base_l_addr += DCORE_OFFSET;
9539 		cq_base_h_addr += DCORE_OFFSET;
9540 		cq_size_addr += DCORE_OFFSET;
9541 	}
9542 
9543 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9544 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9545 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9546 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9547 
9548 	/* memset dcore0 monitors */
9549 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9550 
9551 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9552 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
9553 
9554 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9555 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9556 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9557 
9558 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9559 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9560 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9561 		mon_sts_addr += DCORE_OFFSET;
9562 		mon_cfg_addr += DCORE_OFFSET;
9563 	}
9564 
9565 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9566 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9567 	val = 0;
9568 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9569 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9570 
9571 	/* memset dcore0 sobs */
9572 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9573 
9574 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9575 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9576 
9577 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9578 		gaudi2_memset_device_lbw(hdev, addr, size, val);
9579 		addr += DCORE_OFFSET;
9580 	}
9581 
9582 	/* Flush all WREG to prevent race */
9583 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9584 }
9585 
9586 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9587 {
9588 	u32 reg_base, hw_queue_id;
9589 
9590 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9591 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9592 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9593 			continue;
9594 
9595 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9596 
9597 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9598 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9599 	}
9600 
9601 	/* Flush all WREG to prevent race */
9602 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9603 }
9604 
9605 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9606 {
9607 	u32 reg_base, hw_queue_id;
9608 
9609 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9610 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9611 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9612 			continue;
9613 
9614 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9615 
9616 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9617 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9618 	}
9619 
9620 	/* Flush all WREG to prevent race */
9621 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9622 }
9623 
9624 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9625 {
9626 	return 0;
9627 }
9628 
9629 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9630 {
9631 }
9632 
9633 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9634 						struct dup_block_ctx *cfg_ctx)
9635 {
9636 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9637 	u8 seq;
9638 	int i;
9639 
9640 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
9641 		seq = block_idx * cfg_ctx->instances + i;
9642 
9643 		/* skip disabled instance */
9644 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9645 			continue;
9646 
9647 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9648 					cfg_ctx->data);
9649 	}
9650 }
9651 
9652 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9653 						u64 mask)
9654 {
9655 	int i;
9656 
9657 	cfg_ctx->enabled_mask = mask;
9658 
9659 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
9660 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
9661 }
9662 
9663 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9664 {
9665 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9666 }
9667 
9668 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9669 {
9670 	void *host_mem_virtual_addr;
9671 	dma_addr_t host_mem_dma_addr;
9672 	u64 reserved_va_base;
9673 	u32 pos, size_left, size_to_dma;
9674 	struct hl_ctx *ctx;
9675 	int rc = 0;
9676 
9677 	/* Fetch the ctx */
9678 	ctx = hl_get_compute_ctx(hdev);
9679 	if (!ctx) {
9680 		dev_err(hdev->dev, "No ctx available\n");
9681 		return -EINVAL;
9682 	}
9683 
9684 	/* Allocate buffers for read and for poll */
9685 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9686 								GFP_KERNEL | __GFP_ZERO);
9687 	if (host_mem_virtual_addr == NULL) {
9688 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9689 		rc = -ENOMEM;
9690 		goto put_ctx;
9691 	}
9692 
9693 	/* Reserve VM region on asic side */
9694 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9695 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9696 	if (!reserved_va_base) {
9697 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9698 		rc = -ENOMEM;
9699 		goto free_data_buffer;
9700 	}
9701 
9702 	/* Create mapping on asic side */
9703 	mutex_lock(&hdev->mmu_lock);
9704 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9705 	hl_mmu_invalidate_cache_range(hdev, false,
9706 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9707 				      ctx->asid, reserved_va_base, SZ_2M);
9708 	mutex_unlock(&hdev->mmu_lock);
9709 	if (rc) {
9710 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9711 		goto unreserve_va;
9712 	}
9713 
9714 	/* Enable MMU on KDMA */
9715 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9716 
9717 	pos = 0;
9718 	size_left = size;
9719 	size_to_dma = SZ_2M;
9720 
9721 	while (size_left > 0) {
9722 		if (size_left < SZ_2M)
9723 			size_to_dma = size_left;
9724 
9725 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9726 		if (rc)
9727 			break;
9728 
9729 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9730 
9731 		if (size_left <= SZ_2M)
9732 			break;
9733 
9734 		pos += SZ_2M;
9735 		addr += SZ_2M;
9736 		size_left -= SZ_2M;
9737 	}
9738 
9739 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9740 
9741 	mutex_lock(&hdev->mmu_lock);
9742 	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9743 	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9744 				      ctx->asid, reserved_va_base, SZ_2M);
9745 	mutex_unlock(&hdev->mmu_lock);
9746 unreserve_va:
9747 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9748 free_data_buffer:
9749 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9750 put_ctx:
9751 	hl_ctx_put(ctx);
9752 
9753 	return rc;
9754 }
9755 
9756 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9757 {
9758 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9759 	int min_alloc_order, rc;
9760 
9761 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9762 		return 0;
9763 
9764 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9765 								HOST_SPACE_INTERNAL_CB_SZ,
9766 								&hdev->internal_cb_pool_dma_addr,
9767 								GFP_KERNEL | __GFP_ZERO);
9768 
9769 	if (!hdev->internal_cb_pool_virt_addr)
9770 		return -ENOMEM;
9771 
9772 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9773 					gaudi2_get_wait_cb_size(hdev)));
9774 
9775 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9776 	if (!hdev->internal_cb_pool) {
9777 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
9778 		rc = -ENOMEM;
9779 		goto free_internal_cb_pool;
9780 	}
9781 
9782 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9783 				HOST_SPACE_INTERNAL_CB_SZ, -1);
9784 	if (rc) {
9785 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9786 		rc = -EFAULT;
9787 		goto destroy_internal_cb_pool;
9788 	}
9789 
9790 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9791 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9792 
9793 	if (!hdev->internal_cb_va_base) {
9794 		rc = -ENOMEM;
9795 		goto destroy_internal_cb_pool;
9796 	}
9797 
9798 	mutex_lock(&hdev->mmu_lock);
9799 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9800 					HOST_SPACE_INTERNAL_CB_SZ);
9801 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9802 	mutex_unlock(&hdev->mmu_lock);
9803 
9804 	if (rc)
9805 		goto unreserve_internal_cb_pool;
9806 
9807 	return 0;
9808 
9809 unreserve_internal_cb_pool:
9810 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9811 destroy_internal_cb_pool:
9812 	gen_pool_destroy(hdev->internal_cb_pool);
9813 free_internal_cb_pool:
9814 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9815 					hdev->internal_cb_pool_dma_addr);
9816 
9817 	return rc;
9818 }
9819 
9820 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9821 {
9822 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9823 
9824 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9825 		return;
9826 
9827 	mutex_lock(&hdev->mmu_lock);
9828 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9829 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9830 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9831 	mutex_unlock(&hdev->mmu_lock);
9832 
9833 	gen_pool_destroy(hdev->internal_cb_pool);
9834 
9835 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9836 					hdev->internal_cb_pool_dma_addr);
9837 }
9838 
9839 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9840 {
9841 	gaudi2_restore_user_sm_registers(hdev);
9842 	gaudi2_restore_user_qm_registers(hdev);
9843 }
9844 
9845 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9846 {
9847 	struct hl_device *hdev = ctx->hdev;
9848 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9849 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9850 	int rc;
9851 
9852 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9853 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9854 	if (rc)
9855 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9856 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9857 
9858 	return rc;
9859 }
9860 
9861 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9862 {
9863 	struct hl_device *hdev = ctx->hdev;
9864 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9865 	int rc;
9866 
9867 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9868 				prop->pmmu.page_size, true);
9869 	if (rc)
9870 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9871 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9872 }
9873 
9874 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9875 {
9876 	int rc;
9877 
9878 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9879 	if (rc)
9880 		return rc;
9881 
9882 	/* No need to clear user registers if the device has just
9883 	 * performed reset, we restore only nic qm registers
9884 	 */
9885 	if (ctx->hdev->reset_upon_device_release)
9886 		gaudi2_restore_nic_qm_registers(ctx->hdev);
9887 	else
9888 		gaudi2_restore_user_registers(ctx->hdev);
9889 
9890 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9891 	if (rc)
9892 		return rc;
9893 
9894 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9895 	if (rc)
9896 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9897 
9898 	return rc;
9899 }
9900 
9901 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9902 {
9903 	if (ctx->asid == HL_KERNEL_ASID_ID)
9904 		return;
9905 
9906 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9907 
9908 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9909 }
9910 
9911 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9912 {
9913 	struct hl_device *hdev = cs->ctx->hdev;
9914 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9915 	u32 mon_payload, sob_id, mon_id;
9916 
9917 	if (!cs_needs_completion(cs))
9918 		return 0;
9919 
9920 	/*
9921 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
9922 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
9923 	 * cyclic index. The SOB value is increased when each of the CS jobs is
9924 	 * completed. When the SOB reaches the number of CS jobs, the monitor
9925 	 * generates MSI-X interrupt.
9926 	 */
9927 
9928 	sob_id = mon_id = index;
9929 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9930 				(1 << CQ_ENTRY_READY_SHIFT) | index;
9931 
9932 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9933 				cs->jobs_cnt);
9934 
9935 	return 0;
9936 }
9937 
9938 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9939 {
9940 	return HL_INVALID_QUEUE;
9941 }
9942 
9943 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9944 {
9945 	struct hl_cb *cb = data;
9946 	struct packet_msg_short *pkt;
9947 	u32 value, ctl, pkt_size = sizeof(*pkt);
9948 
9949 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9950 	memset(pkt, 0, pkt_size);
9951 
9952 	/* Inc by 1, Mode ADD */
9953 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9954 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9955 
9956 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9957 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9958 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9959 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9960 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9961 
9962 	pkt->value = cpu_to_le32(value);
9963 	pkt->ctl = cpu_to_le32(ctl);
9964 
9965 	return size + pkt_size;
9966 }
9967 
9968 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9969 {
9970 	u32 ctl, pkt_size = sizeof(*pkt);
9971 
9972 	memset(pkt, 0, pkt_size);
9973 
9974 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9975 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
9976 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9977 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9978 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9979 
9980 	pkt->value = cpu_to_le32(value);
9981 	pkt->ctl = cpu_to_le32(ctl);
9982 
9983 	return pkt_size;
9984 }
9985 
9986 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9987 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
9988 {
9989 	u32 ctl, value, pkt_size = sizeof(*pkt);
9990 	u8 mask;
9991 
9992 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
9993 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
9994 		return 0;
9995 	}
9996 
9997 	memset(pkt, 0, pkt_size);
9998 
9999 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10000 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10001 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10002 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10003 
10004 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10005 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10006 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10007 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10008 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10009 
10010 	pkt->value = cpu_to_le32(value);
10011 	pkt->ctl = cpu_to_le32(ctl);
10012 
10013 	return pkt_size;
10014 }
10015 
10016 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10017 {
10018 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10019 
10020 	memset(pkt, 0, pkt_size);
10021 
10022 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10023 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10024 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10025 
10026 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10027 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10028 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10029 
10030 	pkt->cfg = cpu_to_le32(cfg);
10031 	pkt->ctl = cpu_to_le32(ctl);
10032 
10033 	return pkt_size;
10034 }
10035 
10036 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10037 {
10038 	struct hl_cb *cb = prop->data;
10039 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10040 
10041 	u64 monitor_base, fence_addr = 0;
10042 	u32 stream_index, size = prop->size;
10043 	u16 msg_addr_offset;
10044 
10045 	stream_index = prop->q_idx % 4;
10046 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10047 			QM_FENCE2_OFFSET + stream_index * 4;
10048 
10049 	/*
10050 	 * monitor_base should be the content of the base0 address registers,
10051 	 * so it will be added to the msg short offsets
10052 	 */
10053 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10054 
10055 	/* First monitor config packet: low address of the sync */
10056 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10057 				monitor_base;
10058 
10059 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10060 
10061 	/* Second monitor config packet: high address of the sync */
10062 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10063 				monitor_base;
10064 
10065 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10066 
10067 	/*
10068 	 * Third monitor config packet: the payload, i.e. what to write when the
10069 	 * sync triggers
10070 	 */
10071 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10072 				monitor_base;
10073 
10074 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10075 
10076 	/* Fourth monitor config packet: bind the monitor to a sync object */
10077 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10078 
10079 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10080 						prop->sob_val, msg_addr_offset);
10081 
10082 	/* Fence packet */
10083 	size += gaudi2_add_fence_pkt(buf + size);
10084 
10085 	return size;
10086 }
10087 
10088 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10089 {
10090 	struct hl_hw_sob *hw_sob = data;
10091 
10092 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10093 
10094 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10095 
10096 	kref_init(&hw_sob->kref);
10097 }
10098 
10099 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10100 {
10101 }
10102 
10103 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10104 {
10105 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10106 
10107 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10108 }
10109 
10110 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10111 {
10112 	return 0;
10113 }
10114 
10115 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10116 					struct hl_cs *cs, u32 wait_queue_id,
10117 					u32 collective_engine_id, u32 encaps_signal_offset)
10118 {
10119 	return -EINVAL;
10120 }
10121 
10122 /*
10123  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10124  *                   to DMMU page-size address (64MB) before mapping it in
10125  *                   the MMU.
10126  * The operation is performed on both the virtual and physical addresses.
10127  * for device with 6 HBMs the scramble is:
10128  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10129  *
10130  * Example:
10131  * =============================================================================
10132  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10133  * Phys address                                                     in MMU last
10134  *                                                                    HOP
10135  * =============================================================================
10136  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10137  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10138  * =============================================================================
10139  */
10140 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10141 {
10142 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10143 	u32 divisor, mod_va;
10144 	u64 div_va;
10145 
10146 	/* accept any address in the DRAM address space */
10147 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10148 									VA_HBM_SPACE_END)) {
10149 
10150 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10151 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10152 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10153 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10154 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10155 	}
10156 
10157 	return raw_addr;
10158 }
10159 
10160 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10161 {
10162 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10163 	u32 divisor, mod_va;
10164 	u64 div_va;
10165 
10166 	/* accept any address in the DRAM address space */
10167 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10168 									VA_HBM_SPACE_END)) {
10169 
10170 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10171 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10172 					PAGE_SIZE_64MB, &mod_va);
10173 
10174 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10175 					(div_va * divisor + mod_va));
10176 	}
10177 
10178 	return scrambled_addr;
10179 }
10180 
10181 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10182 {
10183 	u32 base = 0, dcore_id, dec_id;
10184 
10185 	if (core_id >= NUMBER_OF_DEC) {
10186 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10187 		goto out;
10188 	}
10189 
10190 	if (core_id < 8) {
10191 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10192 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10193 
10194 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10195 				dec_id * DCORE_VDEC_OFFSET;
10196 	} else {
10197 		/* PCIe Shared Decoder */
10198 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10199 	}
10200 out:
10201 	return base;
10202 }
10203 
10204 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10205 				u32 *block_size, u32 *block_id)
10206 {
10207 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10208 	int i;
10209 
10210 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10211 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10212 			*block_id = i;
10213 			if (block_size)
10214 				*block_size = gaudi2->mapped_blocks[i].size;
10215 			return 0;
10216 		}
10217 	}
10218 
10219 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10220 
10221 	return -EINVAL;
10222 }
10223 
10224 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10225 			u32 block_id, u32 block_size)
10226 {
10227 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10228 	u64 offset_in_bar;
10229 	u64 address;
10230 	int rc;
10231 
10232 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10233 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10234 		return -EINVAL;
10235 	}
10236 
10237 	/* we allow mapping only an entire block */
10238 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10239 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10240 		return -EINVAL;
10241 	}
10242 
10243 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10244 
10245 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10246 
10247 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10248 			VM_DONTCOPY | VM_NORESERVE;
10249 
10250 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10251 			block_size, vma->vm_page_prot);
10252 	if (rc)
10253 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10254 
10255 	return rc;
10256 }
10257 
10258 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10259 {
10260 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10261 
10262 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10263 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10264 
10265 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10266 		WREG32(irq_handler_offset,
10267 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10268 }
10269 
10270 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10271 {
10272 	switch (mmu_id) {
10273 	case HW_CAP_DCORE0_DMMU0:
10274 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10275 		break;
10276 	case HW_CAP_DCORE0_DMMU1:
10277 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10278 		break;
10279 	case HW_CAP_DCORE0_DMMU2:
10280 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10281 		break;
10282 	case HW_CAP_DCORE0_DMMU3:
10283 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10284 		break;
10285 	case HW_CAP_DCORE1_DMMU0:
10286 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10287 		break;
10288 	case HW_CAP_DCORE1_DMMU1:
10289 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10290 		break;
10291 	case HW_CAP_DCORE1_DMMU2:
10292 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10293 		break;
10294 	case HW_CAP_DCORE1_DMMU3:
10295 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10296 		break;
10297 	case HW_CAP_DCORE2_DMMU0:
10298 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10299 		break;
10300 	case HW_CAP_DCORE2_DMMU1:
10301 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10302 		break;
10303 	case HW_CAP_DCORE2_DMMU2:
10304 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10305 		break;
10306 	case HW_CAP_DCORE2_DMMU3:
10307 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10308 		break;
10309 	case HW_CAP_DCORE3_DMMU0:
10310 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10311 		break;
10312 	case HW_CAP_DCORE3_DMMU1:
10313 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10314 		break;
10315 	case HW_CAP_DCORE3_DMMU2:
10316 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10317 		break;
10318 	case HW_CAP_DCORE3_DMMU3:
10319 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10320 		break;
10321 	case HW_CAP_PMMU:
10322 		*mmu_base = mmPMMU_HBW_MMU_BASE;
10323 		break;
10324 	default:
10325 		return -EINVAL;
10326 	}
10327 
10328 	return 0;
10329 }
10330 
10331 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10332 {
10333 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10334 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10335 	u32 mmu_base;
10336 
10337 	if (!(gaudi2->hw_cap_initialized & mmu_id))
10338 		return;
10339 
10340 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10341 		return;
10342 
10343 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10344 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10345 }
10346 
10347 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10348 {
10349 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10350 
10351 	/* check all HMMUs */
10352 	for (i = 0 ; i < num_of_hmmus ; i++) {
10353 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10354 
10355 		if (mmu_cap_mask & mmu_id)
10356 			gaudi2_ack_mmu_error(hdev, mmu_id);
10357 	}
10358 
10359 	/* check PMMU */
10360 	if (mmu_cap_mask & HW_CAP_PMMU)
10361 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10362 
10363 	return 0;
10364 }
10365 
10366 static void gaudi2_get_msi_info(__le32 *table)
10367 {
10368 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10369 }
10370 
10371 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10372 {
10373 	switch (pll_idx) {
10374 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10375 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10376 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10377 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10378 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10379 	case HL_GAUDI2_MME_PLL: return MME_PLL;
10380 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10381 	case HL_GAUDI2_IF_PLL: return IF_PLL;
10382 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10383 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10384 	case HL_GAUDI2_VID_PLL: return VID_PLL;
10385 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10386 	default: return -EINVAL;
10387 	}
10388 }
10389 
10390 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10391 {
10392 	/* Not implemented */
10393 	return 0;
10394 }
10395 
10396 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10397 {
10398 	/* Not implemented */
10399 	return 0;
10400 }
10401 
10402 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10403 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
10404 {
10405 	/* Not implemented */
10406 	return 0;
10407 }
10408 
10409 
10410 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10411 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
10412 				u32 engine_id, char **buf, size_t *size, size_t *offset)
10413 {
10414 	/* Not implemented */
10415 	return 0;
10416 }
10417 
10418 
10419 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10420 	.monitor_valid = gaudi2_monitor_valid,
10421 	.print_single_monitor = gaudi2_print_single_monitor,
10422 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10423 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
10424 };
10425 
10426 static void gaudi2_state_dump_init(struct hl_device *hdev)
10427 {
10428 	/* Not implemented */
10429 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10430 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10431 }
10432 
10433 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10434 {
10435 	return 0;
10436 }
10437 
10438 static u32 *gaudi2_get_stream_master_qid_arr(void)
10439 {
10440 	return NULL;
10441 }
10442 
10443 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10444 				struct attribute_group *dev_vrm_attr_grp)
10445 {
10446 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10447 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10448 }
10449 
10450 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10451 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
10452 {
10453 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10454 
10455 	/* for host pages the page size must be  */
10456 	if (!is_dram_addr) {
10457 		if (page_size % mmu_prop->page_size)
10458 			goto page_size_err;
10459 
10460 		*real_page_size = mmu_prop->page_size;
10461 		return 0;
10462 	}
10463 
10464 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10465 		goto page_size_err;
10466 
10467 	/*
10468 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10469 	 * than DRAM page size).
10470 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10471 	 * this mismatch when calculating the address to place in the MMU page table.
10472 	 * (in that case also make sure that the dram_page_size is not greater than the
10473 	 * mmu page size)
10474 	 */
10475 	*real_page_size = prop->dram_page_size;
10476 
10477 	return 0;
10478 
10479 page_size_err:
10480 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10481 							page_size, mmu_prop->page_size >> 10);
10482 	return -EFAULT;
10483 }
10484 
10485 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10486 {
10487 	return -EOPNOTSUPP;
10488 }
10489 
10490 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10491 {
10492 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10493 
10494 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
10495 		return 0;
10496 
10497 	return hl_fw_send_device_activity(hdev, open);
10498 }
10499 
10500 static const struct hl_asic_funcs gaudi2_funcs = {
10501 	.early_init = gaudi2_early_init,
10502 	.early_fini = gaudi2_early_fini,
10503 	.late_init = gaudi2_late_init,
10504 	.late_fini = gaudi2_late_fini,
10505 	.sw_init = gaudi2_sw_init,
10506 	.sw_fini = gaudi2_sw_fini,
10507 	.hw_init = gaudi2_hw_init,
10508 	.hw_fini = gaudi2_hw_fini,
10509 	.halt_engines = gaudi2_halt_engines,
10510 	.suspend = gaudi2_suspend,
10511 	.resume = gaudi2_resume,
10512 	.mmap = gaudi2_mmap,
10513 	.ring_doorbell = gaudi2_ring_doorbell,
10514 	.pqe_write = gaudi2_pqe_write,
10515 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10516 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
10517 	.scrub_device_mem = gaudi2_scrub_device_mem,
10518 	.scrub_device_dram = gaudi2_scrub_device_dram,
10519 	.get_int_queue_base = NULL,
10520 	.test_queues = gaudi2_test_queues,
10521 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10522 	.asic_dma_pool_free = gaudi2_dma_pool_free,
10523 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10524 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10525 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
10526 	.asic_dma_map_single = gaudi2_dma_map_single,
10527 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10528 	.cs_parser = gaudi2_cs_parser,
10529 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
10530 	.add_end_of_cb_packets = NULL,
10531 	.update_eq_ci = gaudi2_update_eq_ci,
10532 	.context_switch = gaudi2_context_switch,
10533 	.restore_phase_topology = gaudi2_restore_phase_topology,
10534 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
10535 	.add_device_attr = gaudi2_add_device_attr,
10536 	.handle_eqe = gaudi2_handle_eqe,
10537 	.get_events_stat = gaudi2_get_events_stat,
10538 	.read_pte = NULL,
10539 	.write_pte = NULL,
10540 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10541 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10542 	.mmu_prefetch_cache_range = NULL,
10543 	.send_heartbeat = gaudi2_send_heartbeat,
10544 	.debug_coresight = gaudi2_debug_coresight,
10545 	.is_device_idle = gaudi2_is_device_idle,
10546 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
10547 	.hw_queues_lock = gaudi2_hw_queues_lock,
10548 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
10549 	.get_pci_id = gaudi2_get_pci_id,
10550 	.get_eeprom_data = gaudi2_get_eeprom_data,
10551 	.get_monitor_dump = gaudi2_get_monitor_dump,
10552 	.send_cpu_message = gaudi2_send_cpu_message,
10553 	.pci_bars_map = gaudi2_pci_bars_map,
10554 	.init_iatu = gaudi2_init_iatu,
10555 	.rreg = hl_rreg,
10556 	.wreg = hl_wreg,
10557 	.halt_coresight = gaudi2_halt_coresight,
10558 	.ctx_init = gaudi2_ctx_init,
10559 	.ctx_fini = gaudi2_ctx_fini,
10560 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
10561 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10562 	.load_firmware_to_device = NULL,
10563 	.load_boot_fit_to_device = NULL,
10564 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
10565 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
10566 	.gen_signal_cb = gaudi2_gen_signal_cb,
10567 	.gen_wait_cb = gaudi2_gen_wait_cb,
10568 	.reset_sob = gaudi2_reset_sob,
10569 	.reset_sob_group = gaudi2_reset_sob_group,
10570 	.get_device_time = gaudi2_get_device_time,
10571 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
10572 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10573 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10574 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
10575 	.scramble_addr = gaudi2_mmu_scramble_addr,
10576 	.descramble_addr = gaudi2_mmu_descramble_addr,
10577 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10578 	.get_hw_block_id = gaudi2_get_hw_block_id,
10579 	.hw_block_mmap = gaudi2_block_mmap,
10580 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
10581 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10582 	.get_msi_info = gaudi2_get_msi_info,
10583 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10584 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10585 	.init_firmware_loader = gaudi2_init_firmware_loader,
10586 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10587 	.state_dump_init = gaudi2_state_dump_init,
10588 	.get_sob_addr = &gaudi2_get_sob_addr,
10589 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10590 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10591 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10592 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10593 	.access_dev_mem = hl_access_dev_mem,
10594 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
10595 	.set_engine_cores = gaudi2_set_engine_cores,
10596 	.send_device_activity = gaudi2_send_device_activity,
10597 	.set_dram_properties = gaudi2_set_dram_properties,
10598 };
10599 
10600 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10601 {
10602 	hdev->asic_funcs = &gaudi2_funcs;
10603 }
10604