xref: /openbmc/linux/drivers/accel/habanalabs/gaudi2/gaudi2.c (revision 0970380a7eba3808e12749713ac6acd0ac76d1bc)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
16 
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 
22 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
23 
24 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		50000		/* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
29 #define GAUDI2_RESET_POLL_CNT			3
30 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT			512
34 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
39 
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
41 
42 /*
43  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44  * and the code relies on that value (for array size etc..) we define another value
45  * for MAX faulty TPCs which reflects the cluster binning requirements
46  */
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
48 #define MAX_FAULTY_XBARS			1
49 #define MAX_FAULTY_EDMAS			1
50 #define MAX_FAULTY_DECODERS			1
51 
52 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK		0x3FF
55 
56 #define GAUDI2_NA_EVENT_CAUSE			0xFF
57 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
58 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
59 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
60 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
61 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
63 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
64 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
65 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
66 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
67 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
68 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
69 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
70 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
71 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
72 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
73 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
74 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
75 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
76 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
78 
79 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
80 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
81 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
82 
83 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
84 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
85 
86 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
87 
88 #define IS_DMA_IDLE(dma_core_idle_ind_mask)	\
89 	(!((dma_core_idle_ind_mask) &		\
90 	((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
91 	(DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
92 
93 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
94 
95 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
96 
97 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
98 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
99 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
100 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
101 
102 #define PCIE_DEC_EN_MASK			0x300
103 #define DEC_WORK_STATE_IDLE			0
104 #define DEC_WORK_STATE_PEND			3
105 #define IS_DEC_IDLE(dec_swreg15) \
106 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
107 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
108 
109 /* HBM MMU address scrambling parameters */
110 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
111 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
112 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
113 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
114 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
115 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
116 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
117 #define MMU_RANGE_INV_EN_SHIFT			0
118 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
119 #define MMU_RANGE_INV_ASID_SHIFT		2
120 
121 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
122  * a 2 entries FIFO, and hence it is not enabled for it.
123  */
124 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
125 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
126 
127 #define GAUDI2_MAX_STRING_LEN			64
128 
129 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
130 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
131 
132 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
133 
134 enum hl_pmmu_fatal_cause {
135 	LATENCY_RD_OUT_FIFO_OVERRUN,
136 	LATENCY_WR_OUT_FIFO_OVERRUN,
137 };
138 
139 enum hl_pcie_drain_ind_cause {
140 	LBW_AXI_DRAIN_IND,
141 	HBW_AXI_DRAIN_IND
142 };
143 
144 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
145 	[HBM_ID0] = 0xFFFC,
146 	[HBM_ID1] = 0xFFCF,
147 	[HBM_ID2] = 0xF7F7,
148 	[HBM_ID3] = 0x7F7F,
149 	[HBM_ID4] = 0xFCFF,
150 	[HBM_ID5] = 0xCFFF,
151 };
152 
153 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
154 	[0] = HBM_ID0,
155 	[1] = HBM_ID1,
156 	[2] = HBM_ID4,
157 	[3] = HBM_ID5,
158 };
159 
160 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
161 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
162 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
163 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
164 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
165 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
166 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
167 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
168 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
169 };
170 
171 static const int gaudi2_qman_async_event_id[] = {
172 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
173 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
174 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
175 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
176 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
177 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
178 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
179 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
180 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
181 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
182 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
183 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
184 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
185 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
186 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
187 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
188 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
189 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
190 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
191 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
192 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
193 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
194 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
195 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
196 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
197 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
198 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
199 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
200 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
201 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
202 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
203 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
204 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
205 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
206 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
207 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
208 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
209 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
210 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
211 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
212 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
213 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
214 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
215 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
216 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
217 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
218 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
219 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
220 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
221 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
222 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
223 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
224 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
225 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
226 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
227 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
228 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
229 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
230 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
231 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
232 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
233 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
234 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
235 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
236 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
237 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
238 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
239 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
240 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
241 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
242 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
243 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
244 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
245 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
246 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
247 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
248 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
249 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
250 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
251 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
252 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
253 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
254 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
255 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
256 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
257 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
258 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
259 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
260 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
261 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
262 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
263 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
264 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
265 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
266 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
267 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
268 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
269 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
270 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
271 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
272 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
273 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
274 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
275 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
276 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
277 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
278 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
279 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
280 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
281 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
282 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
283 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
284 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
285 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
286 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
287 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
288 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
289 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
290 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
291 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
292 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
293 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
294 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
295 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
296 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
297 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
298 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
299 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
300 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
301 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
302 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
303 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
304 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
305 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
306 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
307 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
308 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
309 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
310 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
311 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
312 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
313 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
314 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
315 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
316 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
317 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
318 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
319 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
320 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
321 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
322 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
323 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
324 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
325 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
326 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
327 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
328 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
329 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
330 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
331 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
332 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
333 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
334 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
335 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
336 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
337 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
338 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
339 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
340 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
341 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
342 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
343 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
344 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
345 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
346 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
347 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
348 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
349 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
350 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
351 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
352 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
353 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
354 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
355 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
356 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
357 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
358 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
359 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
360 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
361 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
362 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
363 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
364 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
365 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
366 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
367 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
368 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
369 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
370 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
371 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
372 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
373 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
374 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
375 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
376 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
377 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
378 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
379 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
380 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
381 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
382 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
383 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
384 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
385 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
386 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
387 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
388 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
389 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
390 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
391 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
392 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
393 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
394 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
395 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
396 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
397 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
398 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
399 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
400 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
401 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
402 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
403 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
404 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
405 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
406 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
407 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
408 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
409 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
410 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
411 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
412 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
413 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
414 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
415 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
416 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
417 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
418 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
419 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
420 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
421 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
422 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
423 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
424 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
426 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
427 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
428 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
429 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
430 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
431 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
432 };
433 
434 static const int gaudi2_dma_core_async_event_id[] = {
435 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
436 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
437 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
438 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
439 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
440 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
441 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
442 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
443 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
444 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
445 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
446 };
447 
448 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
449 	"qman sei intr",
450 	"arc sei intr"
451 };
452 
453 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
454 	"AXI_TERMINATOR WR",
455 	"AXI_TERMINATOR RD",
456 	"AXI SPLIT SEI Status"
457 };
458 
459 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
460 	"cbu_bresp_sei_intr_cause",
461 	"cbu_rresp_sei_intr_cause",
462 	"lbu_bresp_sei_intr_cause",
463 	"lbu_rresp_sei_intr_cause",
464 	"cbu_axi_split_intr_cause",
465 	"lbu_axi_split_intr_cause",
466 	"arc_ip_excptn_sei_intr_cause",
467 	"dmi_bresp_sei_intr_cause",
468 	"aux2apb_err_sei_intr_cause",
469 	"cfg_lbw_wr_terminated_intr_cause",
470 	"cfg_lbw_rd_terminated_intr_cause",
471 	"cfg_dccm_wr_terminated_intr_cause",
472 	"cfg_dccm_rd_terminated_intr_cause",
473 	"cfg_hbw_rd_terminated_intr_cause"
474 };
475 
476 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
477 	"msix_vcd_hbw_sei",
478 	"msix_l2c_hbw_sei",
479 	"msix_nrm_hbw_sei",
480 	"msix_abnrm_hbw_sei",
481 	"msix_vcd_lbw_sei",
482 	"msix_l2c_lbw_sei",
483 	"msix_nrm_lbw_sei",
484 	"msix_abnrm_lbw_sei",
485 	"apb_vcd_lbw_sei",
486 	"apb_l2c_lbw_sei",
487 	"apb_nrm_lbw_sei",
488 	"apb_abnrm_lbw_sei",
489 	"dec_sei",
490 	"dec_apb_sei",
491 	"trc_apb_sei",
492 	"lbw_mstr_if_sei",
493 	"axi_split_bresp_err_sei",
494 	"hbw_axi_wr_viol_sei",
495 	"hbw_axi_rd_viol_sei",
496 	"lbw_axi_wr_viol_sei",
497 	"lbw_axi_rd_viol_sei",
498 	"vcd_spi",
499 	"l2c_spi",
500 	"nrm_spi",
501 	"abnrm_spi",
502 };
503 
504 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
505 	"PQ AXI HBW error",
506 	"CQ AXI HBW error",
507 	"CP AXI HBW error",
508 	"CP error due to undefined OPCODE",
509 	"CP encountered STOP OPCODE",
510 	"CP AXI LBW error",
511 	"CP WRREG32 or WRBULK returned error",
512 	"N/A",
513 	"FENCE 0 inc over max value and clipped",
514 	"FENCE 1 inc over max value and clipped",
515 	"FENCE 2 inc over max value and clipped",
516 	"FENCE 3 inc over max value and clipped",
517 	"FENCE 0 dec under min value and clipped",
518 	"FENCE 1 dec under min value and clipped",
519 	"FENCE 2 dec under min value and clipped",
520 	"FENCE 3 dec under min value and clipped",
521 	"CPDMA Up overflow",
522 	"PQC L2H error"
523 };
524 
525 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
526 	"RSVD0",
527 	"CQ AXI HBW error",
528 	"CP AXI HBW error",
529 	"CP error due to undefined OPCODE",
530 	"CP encountered STOP OPCODE",
531 	"CP AXI LBW error",
532 	"CP WRREG32 or WRBULK returned error",
533 	"N/A",
534 	"FENCE 0 inc over max value and clipped",
535 	"FENCE 1 inc over max value and clipped",
536 	"FENCE 2 inc over max value and clipped",
537 	"FENCE 3 inc over max value and clipped",
538 	"FENCE 0 dec under min value and clipped",
539 	"FENCE 1 dec under min value and clipped",
540 	"FENCE 2 dec under min value and clipped",
541 	"FENCE 3 dec under min value and clipped",
542 	"CPDMA Up overflow",
543 	"RSVD17",
544 	"CQ_WR_IFIFO_CI_ERR",
545 	"CQ_WR_CTL_CI_ERR",
546 	"ARC_CQF_RD_ERR",
547 	"ARC_CQ_WR_IFIFO_CI_ERR",
548 	"ARC_CQ_WR_CTL_CI_ERR",
549 	"ARC_AXI_ERR",
550 	"CP_SWITCH_WDT_ERR"
551 };
552 
553 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
554 	"Choice push while full error",
555 	"Choice Q watchdog error",
556 	"MSG AXI LBW returned with error"
557 };
558 
559 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
560 	"qm_axi_err",
561 	"qm_trace_fence_events",
562 	"qm_sw_err",
563 	"qm_cp_sw_stop",
564 	"lbw_mstr_rresp_err",
565 	"lbw_mstr_bresp_err",
566 	"lbw_msg_slverr",
567 	"hbw_msg_slverr",
568 	"wbc_slverr",
569 	"hbw_mstr_rresp_err",
570 	"hbw_mstr_bresp_err",
571 	"sb_resp_intr",
572 	"mrsb_resp_intr",
573 	"core_dw_status_0",
574 	"core_dw_status_1",
575 	"core_dw_status_2",
576 	"core_dw_status_3",
577 	"core_dw_status_4",
578 	"core_dw_status_5",
579 	"core_dw_status_6",
580 	"core_dw_status_7",
581 	"async_arc2cpu_sei_intr",
582 };
583 
584 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
585 	"tpc_address_exceed_slm",
586 	"tpc_div_by_0",
587 	"tpc_spu_mac_overflow",
588 	"tpc_spu_addsub_overflow",
589 	"tpc_spu_abs_overflow",
590 	"tpc_spu_fma_fp_dst_nan",
591 	"tpc_spu_fma_fp_dst_inf",
592 	"tpc_spu_convert_fp_dst_nan",
593 	"tpc_spu_convert_fp_dst_inf",
594 	"tpc_spu_fp_dst_denorm",
595 	"tpc_vpu_mac_overflow",
596 	"tpc_vpu_addsub_overflow",
597 	"tpc_vpu_abs_overflow",
598 	"tpc_vpu_convert_fp_dst_nan",
599 	"tpc_vpu_convert_fp_dst_inf",
600 	"tpc_vpu_fma_fp_dst_nan",
601 	"tpc_vpu_fma_fp_dst_inf",
602 	"tpc_vpu_fp_dst_denorm",
603 	"tpc_assertions",
604 	"tpc_illegal_instruction",
605 	"tpc_pc_wrap_around",
606 	"tpc_qm_sw_err",
607 	"tpc_hbw_rresp_err",
608 	"tpc_hbw_bresp_err",
609 	"tpc_lbw_rresp_err",
610 	"tpc_lbw_bresp_err",
611 	"st_unlock_already_locked",
612 	"invalid_lock_access",
613 	"LD_L protection violation",
614 	"ST_L protection violation",
615 };
616 
617 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
618 	"agu_resp_intr",
619 	"qman_axi_err",
620 	"wap sei (wbc axi err)",
621 	"arc sei",
622 	"cfg access error",
623 	"qm_sw_err",
624 	"sbte_dbg_intr_0",
625 	"sbte_dbg_intr_1",
626 	"sbte_dbg_intr_2",
627 	"sbte_dbg_intr_3",
628 	"sbte_dbg_intr_4",
629 	"sbte_prtn_intr_0",
630 	"sbte_prtn_intr_1",
631 	"sbte_prtn_intr_2",
632 	"sbte_prtn_intr_3",
633 	"sbte_prtn_intr_4",
634 };
635 
636 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
637 	"i0",
638 	"i1",
639 	"i2",
640 	"i3",
641 	"i4",
642 };
643 
644 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
645 	"WBC ERR RESP_0",
646 	"WBC ERR RESP_1",
647 	"AP SOURCE POS INF",
648 	"AP SOURCE NEG INF",
649 	"AP SOURCE NAN",
650 	"AP RESULT POS INF",
651 	"AP RESULT NEG INF",
652 };
653 
654 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
655 	"HBW Read returned with error RRESP",
656 	"HBW write returned with error BRESP",
657 	"LBW write returned with error BRESP",
658 	"descriptor_fifo_overflow",
659 	"KDMA SB LBW Read returned with error",
660 	"KDMA WBC LBW Write returned with error",
661 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
662 	"WRONG CFG FOR COMMIT IN LIN DMA"
663 };
664 
665 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
666 	"HBW/LBW Read returned with error RRESP",
667 	"HBW/LBW write returned with error BRESP",
668 	"LBW write returned with error BRESP",
669 	"descriptor_fifo_overflow",
670 	"KDMA SB LBW Read returned with error",
671 	"KDMA WBC LBW Write returned with error",
672 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
673 	"WRONG CFG FOR COMMIT IN LIN DMA"
674 };
675 
676 struct gaudi2_sm_sei_cause_data {
677 	const char *cause_name;
678 	const char *log_name;
679 };
680 
681 static const struct gaudi2_sm_sei_cause_data
682 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
683 	{"calculated SO value overflow/underflow", "SOB ID"},
684 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
685 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
686 };
687 
688 static const char * const
689 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
690 	"LATENCY_RD_OUT_FIFO_OVERRUN",
691 	"LATENCY_WR_OUT_FIFO_OVERRUN",
692 };
693 
694 static const char * const
695 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
696 	"LATENCY_RD_OUT_FIFO_OVERRUN",
697 	"LATENCY_WR_OUT_FIFO_OVERRUN",
698 };
699 
700 static const char * const
701 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
702 	"AXI drain HBW",
703 	"AXI drain LBW",
704 };
705 
706 static const char * const
707 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
708 	"HBW error response",
709 	"LBW error response",
710 	"TLP is blocked by RR"
711 };
712 
713 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
714 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
715 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
716 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
717 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
718 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
719 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
720 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
721 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
722 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
723 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
724 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
725 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
726 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
727 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
728 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
729 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
730 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
731 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
732 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
733 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
734 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
735 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
736 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
737 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
738 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
739 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
740 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
741 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
742 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
743 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
744 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
745 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
746 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
747 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
748 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
749 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
750 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
751 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
752 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
753 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
754 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
755 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
756 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
757 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
758 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
759 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
760 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
761 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
762 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
763 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
764 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
765 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
766 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
767 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
768 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
769 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
770 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
771 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
772 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
773 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
774 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
775 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
776 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
777 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
778 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
779 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
780 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
781 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
782 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
783 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
784 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
785 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
786 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
787 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
788 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
789 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
790 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
791 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
792 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
793 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
794 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
795 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
796 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
797 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
798 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
799 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
800 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
801 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
802 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
803 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
804 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
805 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
806 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
807 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
808 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
809 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
810 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
811 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
812 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
813 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
814 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
815 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
816 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
817 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
818 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
819 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
820 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
821 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
822 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
823 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
824 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
825 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
826 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
827 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
828 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
829 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
830 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
831 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
832 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
833 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
834 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
835 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
836 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
837 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
838 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
839 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
840 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
841 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
842 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
843 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
844 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
845 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
846 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
847 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
848 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
849 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
850 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
851 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
852 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
853 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
854 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
855 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
856 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
857 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
858 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
859 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
860 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
861 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
862 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
863 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
864 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
865 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
866 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
867 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
868 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
869 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
870 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
871 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
872 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
873 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
874 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
875 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
876 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
877 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
878 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
879 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
880 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
881 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
882 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
883 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
884 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
885 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
886 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
887 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
888 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
889 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
890 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
891 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
892 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
893 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
894 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
895 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
896 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
897 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
898 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
899 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
900 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
901 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
902 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
903 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
904 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
905 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
906 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
907 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
908 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
909 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
910 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
911 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
912 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
913 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
914 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
915 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
916 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
917 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
918 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
919 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
920 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
921 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
922 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
923 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
924 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
925 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
926 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
927 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
928 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
929 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
930 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
931 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
932 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
933 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
934 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
935 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
936 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
937 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
938 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
939 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
940 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
941 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
942 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
943 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
944 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
945 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
946 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
947 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
948 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
949 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
950 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
951 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
952 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
953 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
954 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
955 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
956 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
957 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
958 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
959 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
960 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
961 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
962 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
963 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
964 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
965 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
966 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
967 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
968 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
969 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
970 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
971 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
972 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
973 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
974 };
975 
976 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
977 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
978 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
979 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
980 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
981 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
982 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
983 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
984 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
985 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
986 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
987 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
988 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
989 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
990 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
991 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
992 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
993 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
994 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
995 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
996 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
997 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
998 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
999 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1000 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1001 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1002 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1003 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1004 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1005 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1006 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1007 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1008 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1009 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1010 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1011 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1012 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1013 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1014 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1015 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1016 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1017 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1018 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1019 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1020 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1021 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1022 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1023 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1024 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1025 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1026 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1027 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1028 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1029 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1030 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1031 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1032 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1033 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1034 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1035 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1036 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1037 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1038 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1039 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1040 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1041 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1042 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1043 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1044 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1045 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1046 };
1047 
1048 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1049 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1050 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1051 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1052 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1053 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1054 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1055 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1056 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1057 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1058 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1059 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1060 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1061 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1062 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1063 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1064 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1065 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1066 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1067 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1068 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1069 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1070 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1071 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1072 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1073 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1074 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1075 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1076 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1077 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1078 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1079 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1080 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1081 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1082 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1083 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1084 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1085 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1086 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1087 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1088 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1089 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1090 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1091 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1092 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1093 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1094 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1095 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1096 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1097 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1098 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1099 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1100 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1101 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1102 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1103 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1104 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1105 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1106 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1107 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1108 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1109 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1110 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1111 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1112 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1113 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1114 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1115 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1116 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1117 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1118 };
1119 
1120 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1121 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1122 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1123 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1124 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1125 };
1126 
1127 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1128 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1129 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1130 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1131 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1132 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1133 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1134 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1135 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1136 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1137 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1138 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1139 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1140 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1141 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1142 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1143 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1144 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1145 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1146 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1147 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1148 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1149 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1150 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1151 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1152 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1153 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1154 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1155 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1156 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1157 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1158 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1159 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1160 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1161 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1162 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1163 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1164 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1165 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1166 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1167 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1168 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1169 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1170 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1171 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1172 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1173 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1174 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1175 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1176 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1177 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1178 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1179 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1180 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1181 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1182 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1183 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1184 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1185 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1186 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1187 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1188 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1189 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1190 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1191 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1192 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1193 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1194 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1195 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1196 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1197 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1198 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1199 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1200 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1201 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1202 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1203 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1204 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1205 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1206 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1207 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1208 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1209 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1210 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1211 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1212 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1213 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1214 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1215 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1216 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1217 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1218 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1219 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1220 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1221 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1222 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1223 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1224 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1225 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1226 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1227 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1228 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1229 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1230 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1231 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1232 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1233 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1234 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1235 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1236 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1237 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1238 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1239 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1240 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1241 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1242 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1243 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1244 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1245 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1246 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1247 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1248 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1249 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1250 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1251 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1252 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1253 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1254 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1255 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1256 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1257 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1258 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1259 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1260 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1261 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1262 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1263 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1264 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1265 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1266 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1267 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1268 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1269 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1270 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1271 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1272 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1273 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1274 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1275 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1276 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1277 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1278 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1279 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1280 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1281 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1282 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1283 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1284 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1285 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1286 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1287 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1288 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1289 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1290 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1291 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1292 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1293 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1294 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1295 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1296 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1297 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1298 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1299 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1300 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1301 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1302 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1303 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1304 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1305 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1306 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1307 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1308 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1309 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1310 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1311 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1312 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1313 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1314 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1315 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1316 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1317 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1318 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1319 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1320 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1321 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1322 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1323 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1324 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1325 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1326 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1327 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1328 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1329 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1330 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1331 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1332 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1333 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1334 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1335 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1336 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1337 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1338 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1339 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1340 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1341 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1342 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1343 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1344 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1345 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1346 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1347 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1348 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1349 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1350 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1351 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1352 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1353 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1354 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1355 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1356 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1357 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1358 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1359 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1360 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1361 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1362 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1363 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1364 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1365 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1366 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1367 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1368 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1369 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1370 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1371 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1372 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1373 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1374 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1375 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1376 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1377 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1378 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1379 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1380 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1381 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1382 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1383 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1384 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1385 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1386 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1387 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1388 };
1389 
1390 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1391 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1392 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1393 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1394 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1395 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1396 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1397 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1398 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1399 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1400 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1401 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1402 };
1403 
1404 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1405 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1406 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1407 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1408 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1409 };
1410 
1411 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1412 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1413 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1414 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1415 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1416 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1417 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1418 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1419 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1420 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1421 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1422 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1423 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1424 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1425 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1426 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1427 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1428 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1429 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1430 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1431 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1432 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1433 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1434 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1435 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1436 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1437 };
1438 
1439 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1440 	[ROTATOR_ID_0] = mmROT0_BASE,
1441 	[ROTATOR_ID_1] = mmROT1_BASE
1442 };
1443 
1444 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1445 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1446 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1447 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1448 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1449 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1450 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1451 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1452 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1453 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1454 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1455 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1456 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1457 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1458 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1459 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1460 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1461 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1462 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1463 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1464 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1465 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1466 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1467 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1468 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1469 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1470 };
1471 
1472 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1473 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1474 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1475 };
1476 
1477 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1478 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1479 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1480 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1481 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1482 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1483 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1484 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1485 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1486 };
1487 
1488 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1489 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1490 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1491 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1492 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1493 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1494 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1495 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1496 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1497 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1498 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1499 };
1500 
1501 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1502 	RTR_ID_X_Y(2, 4),
1503 	RTR_ID_X_Y(3, 4),
1504 	RTR_ID_X_Y(4, 4),
1505 	RTR_ID_X_Y(5, 4),
1506 	RTR_ID_X_Y(6, 4),
1507 	RTR_ID_X_Y(7, 4),
1508 	RTR_ID_X_Y(8, 4),
1509 	RTR_ID_X_Y(9, 4),
1510 	RTR_ID_X_Y(10, 4),
1511 	RTR_ID_X_Y(11, 4),
1512 	RTR_ID_X_Y(12, 4),
1513 	RTR_ID_X_Y(13, 4),
1514 	RTR_ID_X_Y(14, 4),
1515 	RTR_ID_X_Y(15, 4),
1516 	RTR_ID_X_Y(16, 4),
1517 	RTR_ID_X_Y(17, 4),
1518 	RTR_ID_X_Y(2, 11),
1519 	RTR_ID_X_Y(3, 11),
1520 	RTR_ID_X_Y(4, 11),
1521 	RTR_ID_X_Y(5, 11),
1522 	RTR_ID_X_Y(6, 11),
1523 	RTR_ID_X_Y(7, 11),
1524 	RTR_ID_X_Y(8, 11),
1525 	RTR_ID_X_Y(9, 11),
1526 	RTR_ID_X_Y(0, 0),/* 24 no id */
1527 	RTR_ID_X_Y(0, 0),/* 25 no id */
1528 	RTR_ID_X_Y(0, 0),/* 26 no id */
1529 	RTR_ID_X_Y(0, 0),/* 27 no id */
1530 	RTR_ID_X_Y(14, 11),
1531 	RTR_ID_X_Y(15, 11),
1532 	RTR_ID_X_Y(16, 11),
1533 	RTR_ID_X_Y(17, 11)
1534 };
1535 
1536 enum rtr_id {
1537 	DCORE0_RTR0,
1538 	DCORE0_RTR1,
1539 	DCORE0_RTR2,
1540 	DCORE0_RTR3,
1541 	DCORE0_RTR4,
1542 	DCORE0_RTR5,
1543 	DCORE0_RTR6,
1544 	DCORE0_RTR7,
1545 	DCORE1_RTR0,
1546 	DCORE1_RTR1,
1547 	DCORE1_RTR2,
1548 	DCORE1_RTR3,
1549 	DCORE1_RTR4,
1550 	DCORE1_RTR5,
1551 	DCORE1_RTR6,
1552 	DCORE1_RTR7,
1553 	DCORE2_RTR0,
1554 	DCORE2_RTR1,
1555 	DCORE2_RTR2,
1556 	DCORE2_RTR3,
1557 	DCORE2_RTR4,
1558 	DCORE2_RTR5,
1559 	DCORE2_RTR6,
1560 	DCORE2_RTR7,
1561 	DCORE3_RTR0,
1562 	DCORE3_RTR1,
1563 	DCORE3_RTR2,
1564 	DCORE3_RTR3,
1565 	DCORE3_RTR4,
1566 	DCORE3_RTR5,
1567 	DCORE3_RTR6,
1568 	DCORE3_RTR7,
1569 };
1570 
1571 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1572 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1573 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1574 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1575 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1576 	DCORE0_RTR0
1577 };
1578 
1579 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1580 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1581 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1582 };
1583 
1584 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1585 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1586 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1587 };
1588 
1589 struct sft_info {
1590 	u8 interface_id;
1591 	u8 dcore_id;
1592 };
1593 
1594 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1595 	{0, 0},	{1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3},	{0, 2},	{0, 3},
1596 };
1597 
1598 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1599 	DCORE0_RTR0, DCORE0_RTR0
1600 };
1601 
1602 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1603 	DCORE2_RTR0, DCORE3_RTR7
1604 };
1605 
1606 struct mme_initiators_rtr_id {
1607 	u32 wap0;
1608 	u32 wap1;
1609 	u32 write;
1610 	u32 read;
1611 	u32 sbte0;
1612 	u32 sbte1;
1613 	u32 sbte2;
1614 	u32 sbte3;
1615 	u32 sbte4;
1616 };
1617 
1618 enum mme_initiators {
1619 	MME_WAP0 = 0,
1620 	MME_WAP1,
1621 	MME_WRITE,
1622 	MME_READ,
1623 	MME_SBTE0,
1624 	MME_SBTE1,
1625 	MME_SBTE2,
1626 	MME_SBTE3,
1627 	MME_SBTE4,
1628 	MME_INITIATORS_MAX
1629 };
1630 
1631 static const struct mme_initiators_rtr_id
1632 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1633 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1634 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1635 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1636 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1637 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1638 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1639 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1640 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1641 };
1642 
1643 enum razwi_event_sources {
1644 	RAZWI_TPC,
1645 	RAZWI_MME,
1646 	RAZWI_EDMA,
1647 	RAZWI_PDMA,
1648 	RAZWI_NIC,
1649 	RAZWI_DEC,
1650 	RAZWI_ROT
1651 };
1652 
1653 struct hbm_mc_error_causes {
1654 	u32 mask;
1655 	char cause[50];
1656 };
1657 
1658 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1659 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1660 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1661 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1662 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1663 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1664 };
1665 
1666 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1667 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1668 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1669 	[HBM_SEI_READ_ERR] = "SEI read data error",
1670 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1671 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1672 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1673 	[HBM_SEI_DFI] = "SEI DFI error",
1674 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1675 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1676 };
1677 
1678 struct mmu_spi_sei_cause {
1679 	char cause[50];
1680 	int clear_bit;
1681 };
1682 
1683 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1684 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
1685 	{"page access", 1},		/* INTERRUPT_CLR[1] */
1686 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
1687 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
1688 	{"mmu rei0", -1},		/* no clear register bit */
1689 	{"mmu rei1", -1},		/* no clear register bit */
1690 	{"stlb rei0", -1},		/* no clear register bit */
1691 	{"stlb rei1", -1},		/* no clear register bit */
1692 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
1693 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
1694 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
1695 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
1696 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1697 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1698 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1699 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1700 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
1701 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
1702 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
1703 };
1704 
1705 struct gaudi2_cache_invld_params {
1706 	u64 start_va;
1707 	u64 end_va;
1708 	u32 inv_start_val;
1709 	u32 flags;
1710 	bool range_invalidation;
1711 };
1712 
1713 struct gaudi2_tpc_idle_data {
1714 	struct engines_data *e;
1715 	unsigned long *mask;
1716 	bool *is_idle;
1717 	const char *tpc_fmt;
1718 };
1719 
1720 struct gaudi2_tpc_mmu_data {
1721 	u32 rw_asid;
1722 };
1723 
1724 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1725 
1726 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1727 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1728 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1729 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1730 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1731 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1732 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1733 										bool is_memset);
1734 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1735 
1736 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1737 {
1738 
1739 }
1740 
1741 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1742 {
1743 	return sizeof(struct packet_msg_short);
1744 }
1745 
1746 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1747 {
1748 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1749 }
1750 
1751 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1752 {
1753 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1754 	int dcore, inst, tpc_seq;
1755 	u32 offset;
1756 
1757 	/* init the return code */
1758 	ctx->rc = 0;
1759 
1760 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1761 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1762 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1763 
1764 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1765 				continue;
1766 
1767 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1768 
1769 			ctx->fn(hdev, dcore, inst, offset, ctx);
1770 			if (ctx->rc) {
1771 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1772 							dcore, inst);
1773 				return;
1774 			}
1775 		}
1776 	}
1777 
1778 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1779 		return;
1780 
1781 	/* special check for PCI TPC (DCORE0_TPC6) */
1782 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1783 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1784 	if (ctx->rc)
1785 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1786 }
1787 
1788 static bool gaudi2_host_phys_addr_valid(u64 addr)
1789 {
1790 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1791 		return true;
1792 
1793 	return false;
1794 }
1795 
1796 static int set_number_of_functional_hbms(struct hl_device *hdev)
1797 {
1798 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1799 	u8 faulty_hbms = hweight64(hdev->dram_binning);
1800 
1801 	/* check if all HBMs should be used */
1802 	if (!faulty_hbms) {
1803 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1804 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
1805 		return 0;
1806 	}
1807 
1808 	/*
1809 	 * check for error condition in which number of binning
1810 	 * candidates is higher than the maximum supported by the
1811 	 * driver (in which case binning mask shall be ignored and driver will
1812 	 * set the default)
1813 	 */
1814 	if (faulty_hbms > MAX_FAULTY_HBMS) {
1815 		dev_err(hdev->dev,
1816 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1817 			MAX_FAULTY_HBMS, hdev->dram_binning);
1818 		return -EINVAL;
1819 	}
1820 
1821 	/*
1822 	 * by default, number of functional HBMs in Gaudi2 is always
1823 	 * GAUDI2_HBM_NUM - 1.
1824 	 */
1825 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1826 	return 0;
1827 }
1828 
1829 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1830 {
1831 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1832 	u32 basic_hbm_page_size;
1833 	int rc;
1834 
1835 	rc = set_number_of_functional_hbms(hdev);
1836 	if (rc)
1837 		return -EINVAL;
1838 
1839 	/*
1840 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1841 	 * in which we are using x16 bigger page size to be able to populate the entire
1842 	 * HBM mappings in the TLB
1843 	 */
1844 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1845 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1846 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1847 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
1848 	prop->dram_base_address = DRAM_PHYS_BASE;
1849 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1850 	prop->dram_supports_virtual_memory = true;
1851 
1852 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1853 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1854 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1855 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1856 
1857 	/* since DRAM page size differs from DMMU page size we need to allocate
1858 	 * DRAM memory in units of dram_page size and mapping this memory in
1859 	 * units of DMMU page size. we overcome this size mismatch using a
1860 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
1861 	 * page.
1862 	 * We therefore:
1863 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
1864 	 *    (suppose we get n such pages)
1865 	 * 2. limit the amount of virtual address space we got from 1 above to
1866 	 *    a multiple of 64M as we don't want the scrambled address to cross
1867 	 *    the DRAM virtual address space.
1868 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
1869 	 * 3. determine the and address accordingly
1870 	 *    end_addr = start_addr + m * 48M
1871 	 *
1872 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
1873 	 */
1874 	prop->dmmu.start_addr = prop->dram_base_address +
1875 			(prop->dram_page_size *
1876 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1877 
1878 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1879 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1880 
1881 	return 0;
1882 }
1883 
1884 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1885 {
1886 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1887 	struct hw_queue_properties *q_props;
1888 	u32 num_sync_stream_queues = 0;
1889 	int i;
1890 
1891 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1892 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1893 					GFP_KERNEL);
1894 
1895 	if (!prop->hw_queues_props)
1896 		return -ENOMEM;
1897 
1898 	q_props = prop->hw_queues_props;
1899 
1900 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1901 		q_props[i].type = QUEUE_TYPE_HW;
1902 		q_props[i].driver_only = 0;
1903 
1904 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1905 			q_props[i].supports_sync_stream = 0;
1906 		} else {
1907 			q_props[i].supports_sync_stream = 1;
1908 			num_sync_stream_queues++;
1909 		}
1910 
1911 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1912 	}
1913 
1914 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1915 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1916 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1917 
1918 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1919 	prop->cfg_base_address = CFG_BASE;
1920 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1921 	prop->host_base_address = HOST_PHYS_BASE_0;
1922 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1923 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1924 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1925 	prop->user_dec_intr_count = NUMBER_OF_DEC;
1926 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1927 	prop->completion_mode = HL_COMPLETION_MODE_CS;
1928 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1929 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1930 
1931 	prop->sram_base_address = SRAM_BASE_ADDR;
1932 	prop->sram_size = SRAM_SIZE;
1933 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1934 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1935 
1936 	prop->hints_range_reservation = true;
1937 
1938 	if (hdev->pldm)
1939 		prop->mmu_pgt_size = 0x800000; /* 8MB */
1940 	else
1941 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1942 
1943 	prop->mmu_pte_size = HL_PTE_SIZE;
1944 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1945 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1946 
1947 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1948 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1949 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1950 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1951 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1952 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1953 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1954 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1955 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1956 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1957 	prop->dmmu.page_size = PAGE_SIZE_1GB;
1958 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1959 	prop->dmmu.last_mask = LAST_MASK;
1960 	prop->dmmu.host_resident = 1;
1961 	/* TODO: will be duplicated until implementing per-MMU props */
1962 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1963 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1964 
1965 	/*
1966 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
1967 	 * the addresses and allocated space for FW image does not cross memory bounds).
1968 	 * for this reason we set the DRAM size to the minimum possible and later it will
1969 	 * be modified according to what reported in the cpucp info packet
1970 	 */
1971 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1972 
1973 	hdev->pmmu_huge_range = true;
1974 	prop->pmmu.host_resident = 1;
1975 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1976 	prop->pmmu.last_mask = LAST_MASK;
1977 	/* TODO: will be duplicated until implementing per-MMU props */
1978 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1979 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1980 
1981 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1982 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1983 	prop->hints_host_hpage_reserved_va_range.start_addr =
1984 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1985 	prop->hints_host_hpage_reserved_va_range.end_addr =
1986 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1987 
1988 	if (PAGE_SIZE == SZ_64K) {
1989 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1990 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1991 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1992 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1993 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1994 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1995 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1996 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1997 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1998 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
1999 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2000 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2001 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2002 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2003 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2004 
2005 		/* shifts and masks are the same in PMMU and HPMMU */
2006 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2007 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2008 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2009 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2010 	} else {
2011 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2012 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2013 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2014 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2015 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2016 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2017 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2018 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2019 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2020 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2021 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2022 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2023 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2024 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2025 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2026 
2027 		/* shifts and masks are the same in PMMU and HPMMU */
2028 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2029 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2030 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2031 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2032 	}
2033 
2034 	prop->num_engine_cores = CPU_ID_MAX;
2035 	prop->cfg_size = CFG_SIZE;
2036 	prop->max_asid = MAX_ASID;
2037 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2038 
2039 	prop->dc_power_default = DC_POWER_DEFAULT;
2040 
2041 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2042 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2043 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2044 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2045 
2046 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2047 
2048 	prop->mme_master_slave_mode = 1;
2049 
2050 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2051 					(num_sync_stream_queues * HL_RSVD_SOBS);
2052 
2053 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2054 					(num_sync_stream_queues * HL_RSVD_MONS);
2055 
2056 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2057 
2058 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2059 
2060 	prop->fw_cpu_boot_dev_sts0_valid = false;
2061 	prop->fw_cpu_boot_dev_sts1_valid = false;
2062 	prop->hard_reset_done_by_fw = false;
2063 	prop->gic_interrupts_enable = true;
2064 
2065 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2066 
2067 	prop->max_dec = NUMBER_OF_DEC;
2068 
2069 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2070 
2071 	prop->dma_mask = 64;
2072 
2073 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2074 
2075 	return 0;
2076 }
2077 
2078 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2079 {
2080 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2081 	bool is_wc[3] = {false, false, true};
2082 	int rc;
2083 
2084 	rc = hl_pci_bars_map(hdev, name, is_wc);
2085 	if (rc)
2086 		return rc;
2087 
2088 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2089 
2090 	return 0;
2091 }
2092 
2093 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2094 {
2095 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2096 	struct hl_inbound_pci_region pci_region;
2097 	u64 old_addr = addr;
2098 	int rc;
2099 
2100 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2101 		return old_addr;
2102 
2103 	if (hdev->asic_prop.iatu_done_by_fw)
2104 		return U64_MAX;
2105 
2106 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2107 	pci_region.mode = PCI_BAR_MATCH_MODE;
2108 	pci_region.bar = DRAM_BAR_ID;
2109 	pci_region.addr = addr;
2110 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2111 	if (rc)
2112 		return U64_MAX;
2113 
2114 	if (gaudi2) {
2115 		old_addr = gaudi2->dram_bar_cur_addr;
2116 		gaudi2->dram_bar_cur_addr = addr;
2117 	}
2118 
2119 	return old_addr;
2120 }
2121 
2122 static int gaudi2_init_iatu(struct hl_device *hdev)
2123 {
2124 	struct hl_inbound_pci_region inbound_region;
2125 	struct hl_outbound_pci_region outbound_region;
2126 	u32 bar_addr_low, bar_addr_high;
2127 	int rc;
2128 
2129 	if (hdev->asic_prop.iatu_done_by_fw)
2130 		return 0;
2131 
2132 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2133 	 * We must map this region in BAR match mode in order to
2134 	 * fetch BAR physical base address
2135 	 */
2136 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2137 	inbound_region.bar = SRAM_CFG_BAR_ID;
2138 	/* Base address must be aligned to Bar size which is 256 MB */
2139 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2140 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2141 	if (rc)
2142 		return rc;
2143 
2144 	/* Fetch physical BAR address */
2145 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2146 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2147 
2148 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2149 
2150 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2151 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2152 	inbound_region.bar = SRAM_CFG_BAR_ID;
2153 	inbound_region.offset_in_bar = 0;
2154 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2155 	inbound_region.size = CFG_REGION_SIZE;
2156 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2157 	if (rc)
2158 		return rc;
2159 
2160 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2161 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2162 	inbound_region.bar = SRAM_CFG_BAR_ID;
2163 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2164 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2165 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2166 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2167 	if (rc)
2168 		return rc;
2169 
2170 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2171 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2172 	inbound_region.bar = DRAM_BAR_ID;
2173 	inbound_region.addr = DRAM_PHYS_BASE;
2174 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2175 	if (rc)
2176 		return rc;
2177 
2178 	/* Outbound Region 0 - Point to Host */
2179 	outbound_region.addr = HOST_PHYS_BASE_0;
2180 	outbound_region.size = HOST_PHYS_SIZE_0;
2181 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2182 
2183 	return rc;
2184 }
2185 
2186 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2187 {
2188 	return RREG32(mmHW_STATE);
2189 }
2190 
2191 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2192 {
2193 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2194 
2195 	/*
2196 	 * check for error condition in which number of binning candidates
2197 	 * is higher than the maximum supported by the driver
2198 	 */
2199 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2200 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2201 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2202 					hdev->tpc_binning);
2203 		return -EINVAL;
2204 	}
2205 
2206 	prop->tpc_binning_mask = hdev->tpc_binning;
2207 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2208 
2209 	return 0;
2210 }
2211 
2212 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2213 {
2214 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2215 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2216 	u64 tpc_binning_mask;
2217 	u8 subst_idx = 0;
2218 	int i, rc;
2219 
2220 	rc = gaudi2_tpc_binning_init_prop(hdev);
2221 	if (rc)
2222 		return rc;
2223 
2224 	tpc_binning_mask = prop->tpc_binning_mask;
2225 
2226 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2227 		u8 subst_seq, binned, qid_base;
2228 
2229 		if (tpc_binning_mask == 0)
2230 			break;
2231 
2232 		if (subst_idx == 0) {
2233 			subst_seq = TPC_ID_DCORE0_TPC6;
2234 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2235 		} else {
2236 			subst_seq = TPC_ID_DCORE3_TPC5;
2237 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2238 		}
2239 
2240 
2241 		/* clear bit from mask */
2242 		binned = __ffs(tpc_binning_mask);
2243 		/*
2244 		 * Coverity complains about possible out-of-bound access in
2245 		 * clear_bit
2246 		 */
2247 		if (binned >= TPC_ID_SIZE) {
2248 			dev_err(hdev->dev,
2249 				"Invalid binned TPC (binning mask: %llx)\n",
2250 				tpc_binning_mask);
2251 			return -EINVAL;
2252 		}
2253 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2254 
2255 		/* also clear replacing TPC bit from enabled mask */
2256 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2257 
2258 		/* bin substite TPC's Qs */
2259 		q_props[qid_base].binned = 1;
2260 		q_props[qid_base + 1].binned = 1;
2261 		q_props[qid_base + 2].binned = 1;
2262 		q_props[qid_base + 3].binned = 1;
2263 
2264 		subst_idx++;
2265 	}
2266 
2267 	return 0;
2268 }
2269 
2270 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2271 {
2272 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2273 	u8 num_faulty;
2274 
2275 	num_faulty = hweight32(hdev->decoder_binning);
2276 
2277 	/*
2278 	 * check for error condition in which number of binning candidates
2279 	 * is higher than the maximum supported by the driver
2280 	 */
2281 	if (num_faulty > MAX_FAULTY_DECODERS) {
2282 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2283 						hdev->decoder_binning);
2284 		return -EINVAL;
2285 	}
2286 
2287 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2288 
2289 	if (prop->decoder_binning_mask)
2290 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2291 	else
2292 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2293 
2294 	return 0;
2295 }
2296 
2297 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2298 {
2299 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2300 
2301 	/* check if we should override default binning */
2302 	if (!hdev->dram_binning) {
2303 		prop->dram_binning_mask = 0;
2304 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2305 		return;
2306 	}
2307 
2308 	/* set DRAM binning constraints */
2309 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2310 	prop->dram_binning_mask = hdev->dram_binning;
2311 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2312 }
2313 
2314 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2315 {
2316 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2317 	struct hw_queue_properties *q_props;
2318 	u8 seq, num_faulty;
2319 
2320 	num_faulty = hweight32(hdev->edma_binning);
2321 
2322 	/*
2323 	 * check for error condition in which number of binning candidates
2324 	 * is higher than the maximum supported by the driver
2325 	 */
2326 	if (num_faulty > MAX_FAULTY_EDMAS) {
2327 		dev_err(hdev->dev,
2328 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2329 			hdev->edma_binning);
2330 		return -EINVAL;
2331 	}
2332 
2333 	if (!hdev->edma_binning) {
2334 		prop->edma_binning_mask = 0;
2335 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2336 		return 0;
2337 	}
2338 
2339 	seq = __ffs((unsigned long)hdev->edma_binning);
2340 
2341 	/* set binning constraints */
2342 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2343 	prop->edma_binning_mask = hdev->edma_binning;
2344 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2345 
2346 	/* bin substitute EDMA's queue */
2347 	q_props = prop->hw_queues_props;
2348 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2349 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2350 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2351 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2352 
2353 	return 0;
2354 }
2355 
2356 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2357 {
2358 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2359 	u8 num_faulty, seq;
2360 
2361 	/* check if we should override default binning */
2362 	if (!xbar_edge_iso_mask) {
2363 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2364 		return 0;
2365 	}
2366 
2367 	/*
2368 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2369 	 * only the FW can set a redundancy value). for user it'll always be 0.
2370 	 */
2371 	num_faulty = hweight32(xbar_edge_iso_mask);
2372 
2373 	/*
2374 	 * check for error condition in which number of binning candidates
2375 	 * is higher than the maximum supported by the driver
2376 	 */
2377 	if (num_faulty > MAX_FAULTY_XBARS) {
2378 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2379 									MAX_FAULTY_XBARS);
2380 		return -EINVAL;
2381 	}
2382 
2383 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2384 
2385 	/* set binning constraints */
2386 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2387 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2388 
2389 	return 0;
2390 }
2391 
2392 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2393 {
2394 	int rc;
2395 
2396 	/*
2397 	 * mark all clusters as good, each component will "fail" cluster
2398 	 * based on eFuse/user values.
2399 	 * If more than single cluster is faulty- the chip is unusable
2400 	 */
2401 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2402 
2403 	gaudi2_set_dram_binning_masks(hdev);
2404 
2405 	rc = gaudi2_set_edma_binning_masks(hdev);
2406 	if (rc)
2407 		return rc;
2408 
2409 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2410 	if (rc)
2411 		return rc;
2412 
2413 
2414 	/* always initially set to full mask */
2415 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2416 
2417 	return 0;
2418 }
2419 
2420 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2421 {
2422 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2423 	int rc;
2424 
2425 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2426 	if (rc)
2427 		return rc;
2428 
2429 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2430 	if (prop->faulty_dram_cluster_map) {
2431 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2432 
2433 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2434 	}
2435 
2436 	return 0;
2437 }
2438 
2439 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2440 {
2441 	int rc;
2442 
2443 	rc = gaudi2_set_cluster_binning_masks(hdev);
2444 	if (rc)
2445 		return rc;
2446 
2447 	rc = gaudi2_set_tpc_binning_masks(hdev);
2448 	if (rc)
2449 		return rc;
2450 
2451 	rc = gaudi2_set_dec_binning_masks(hdev);
2452 	if (rc)
2453 		return rc;
2454 
2455 	return 0;
2456 }
2457 
2458 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2459 {
2460 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2461 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2462 	long max_power;
2463 	u64 dram_size;
2464 	int rc;
2465 
2466 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2467 		return 0;
2468 
2469 	/* No point of asking this information again when not doing hard reset, as the device
2470 	 * CPU hasn't been reset
2471 	 */
2472 	if (hdev->reset_info.in_compute_reset)
2473 		return 0;
2474 
2475 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2476 										mmCPU_BOOT_ERR1);
2477 	if (rc)
2478 		return rc;
2479 
2480 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2481 	if (dram_size) {
2482 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2483 
2484 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2485 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2486 			dev_err(hdev->dev,
2487 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2488 				dram_size, prop->dram_size);
2489 			dram_size = prop->dram_size;
2490 		}
2491 
2492 		prop->dram_size = dram_size;
2493 		prop->dram_end_address = prop->dram_base_address + dram_size;
2494 	}
2495 
2496 	if (!strlen(prop->cpucp_info.card_name))
2497 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2498 
2499 	/* Overwrite binning masks with the actual binning values from F/W */
2500 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2501 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2502 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2503 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2504 
2505 	/*
2506 	 * at this point the DRAM parameters need to be updated according to data obtained
2507 	 * from the FW
2508 	 */
2509 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2510 	if (rc)
2511 		return rc;
2512 
2513 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2514 	if (rc)
2515 		return rc;
2516 
2517 	max_power = hl_fw_get_max_power(hdev);
2518 	if (max_power < 0)
2519 		return max_power;
2520 
2521 	prop->max_power_default = (u64) max_power;
2522 
2523 	return 0;
2524 }
2525 
2526 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2527 {
2528 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2529 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2530 	int rc;
2531 
2532 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2533 		return 0;
2534 
2535 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2536 	if (rc)
2537 		return rc;
2538 
2539 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2540 
2541 	return 0;
2542 }
2543 
2544 static int gaudi2_early_init(struct hl_device *hdev)
2545 {
2546 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2547 	struct pci_dev *pdev = hdev->pdev;
2548 	resource_size_t pci_bar_size;
2549 	int rc;
2550 
2551 	rc = gaudi2_set_fixed_properties(hdev);
2552 	if (rc)
2553 		return rc;
2554 
2555 	/* Check BAR sizes */
2556 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2557 
2558 	if (pci_bar_size != CFG_BAR_SIZE) {
2559 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2560 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2561 		rc = -ENODEV;
2562 		goto free_queue_props;
2563 	}
2564 
2565 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2566 	if (pci_bar_size != MSIX_BAR_SIZE) {
2567 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2568 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2569 		rc = -ENODEV;
2570 		goto free_queue_props;
2571 	}
2572 
2573 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2574 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2575 
2576 	/*
2577 	 * Only in pldm driver config iATU
2578 	 */
2579 	if (hdev->pldm)
2580 		hdev->asic_prop.iatu_done_by_fw = false;
2581 	else
2582 		hdev->asic_prop.iatu_done_by_fw = true;
2583 
2584 	rc = hl_pci_init(hdev);
2585 	if (rc)
2586 		goto free_queue_props;
2587 
2588 	/* Before continuing in the initialization, we need to read the preboot
2589 	 * version to determine whether we run with a security-enabled firmware
2590 	 */
2591 	rc = hl_fw_read_preboot_status(hdev);
2592 	if (rc) {
2593 		if (hdev->reset_on_preboot_fail)
2594 			hdev->asic_funcs->hw_fini(hdev, true, false);
2595 		goto pci_fini;
2596 	}
2597 
2598 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2599 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2600 		hdev->asic_funcs->hw_fini(hdev, true, false);
2601 	}
2602 
2603 	return 0;
2604 
2605 pci_fini:
2606 	hl_pci_fini(hdev);
2607 free_queue_props:
2608 	kfree(hdev->asic_prop.hw_queues_props);
2609 	return rc;
2610 }
2611 
2612 static int gaudi2_early_fini(struct hl_device *hdev)
2613 {
2614 	kfree(hdev->asic_prop.hw_queues_props);
2615 	hl_pci_fini(hdev);
2616 
2617 	return 0;
2618 }
2619 
2620 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2621 {
2622 	switch (arc_id) {
2623 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2624 		return true;
2625 	default:
2626 		return false;
2627 	}
2628 }
2629 
2630 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2631 {
2632 	switch (arc_id) {
2633 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2634 		return true;
2635 	default:
2636 		return false;
2637 	}
2638 }
2639 
2640 static void gaudi2_init_arcs(struct hl_device *hdev)
2641 {
2642 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2643 	u64 arc_id;
2644 	u32 i;
2645 
2646 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2647 		if (gaudi2_is_arc_enabled(hdev, i))
2648 			continue;
2649 
2650 		gaudi2_set_arc_id_cap(hdev, i);
2651 	}
2652 
2653 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2654 		if (!gaudi2_is_queue_enabled(hdev, i))
2655 			continue;
2656 
2657 		arc_id = gaudi2_queue_id_to_arc_id[i];
2658 		if (gaudi2_is_arc_enabled(hdev, arc_id))
2659 			continue;
2660 
2661 		if (gaudi2_is_arc_nic_owned(arc_id) &&
2662 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2663 			continue;
2664 
2665 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2666 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2667 			continue;
2668 
2669 		gaudi2_set_arc_id_cap(hdev, arc_id);
2670 	}
2671 }
2672 
2673 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2674 {
2675 	u32 reg_base, reg_val;
2676 	int rc;
2677 
2678 	switch (cpu_id) {
2679 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2680 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
2681 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2682 						ARC_DCCM_BLOCK_SIZE * 2, true);
2683 		if (rc)
2684 			return rc;
2685 		break;
2686 	case CPU_ID_SCHED_ARC4:
2687 	case CPU_ID_SCHED_ARC5:
2688 	case CPU_ID_MME_QMAN_ARC0:
2689 	case CPU_ID_MME_QMAN_ARC1:
2690 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
2691 
2692 		/* Scrub lower DCCM block */
2693 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2694 						ARC_DCCM_BLOCK_SIZE, true);
2695 		if (rc)
2696 			return rc;
2697 
2698 		/* Switch to upper DCCM block */
2699 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2700 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2701 
2702 		/* Scrub upper DCCM block */
2703 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2704 						ARC_DCCM_BLOCK_SIZE, true);
2705 		if (rc)
2706 			return rc;
2707 
2708 		/* Switch to lower DCCM block */
2709 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2710 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2711 		break;
2712 	default:
2713 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2714 						ARC_DCCM_BLOCK_SIZE, true);
2715 		if (rc)
2716 			return rc;
2717 	}
2718 
2719 	return 0;
2720 }
2721 
2722 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2723 {
2724 	u16 arc_id;
2725 
2726 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2727 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
2728 			continue;
2729 
2730 		gaudi2_scrub_arc_dccm(hdev, arc_id);
2731 	}
2732 }
2733 
2734 static int gaudi2_late_init(struct hl_device *hdev)
2735 {
2736 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2737 	int rc;
2738 
2739 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
2740 
2741 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2742 					gaudi2->virt_msix_db_dma_addr);
2743 	if (rc) {
2744 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2745 		return rc;
2746 	}
2747 
2748 	rc = gaudi2_fetch_psoc_frequency(hdev);
2749 	if (rc) {
2750 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2751 		goto disable_pci_access;
2752 	}
2753 
2754 	gaudi2_init_arcs(hdev);
2755 	gaudi2_scrub_arcs_dccm(hdev);
2756 	gaudi2_init_security(hdev);
2757 
2758 	return 0;
2759 
2760 disable_pci_access:
2761 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2762 
2763 	return rc;
2764 }
2765 
2766 static void gaudi2_late_fini(struct hl_device *hdev)
2767 {
2768 	hl_hwmon_release_resources(hdev);
2769 }
2770 
2771 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2772 {
2773 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2774 
2775 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2776 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2777 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2778 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2779 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2780 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2781 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2782 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2783 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2784 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2785 }
2786 
2787 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2788 {
2789 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2790 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2791 	u32 block_size, umr_start_idx, num_umr_blocks;
2792 	int i;
2793 
2794 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2795 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2796 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
2797 		else
2798 			block_size = ARC_DCCM_BLOCK_SIZE;
2799 
2800 		blocks[i].address = gaudi2_arc_dccm_bases[i];
2801 		blocks[i].size = block_size;
2802 	}
2803 
2804 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2805 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2806 
2807 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2808 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2809 
2810 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2811 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2812 
2813 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2814 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2815 
2816 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2817 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2818 
2819 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2820 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2821 
2822 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2823 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2824 
2825 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2826 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2827 
2828 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2829 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2830 	for (i = 0 ; i < num_umr_blocks ; i++) {
2831 		u8 nic_id, umr_block_id;
2832 
2833 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2834 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2835 
2836 		blocks[umr_start_idx + i].address =
2837 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2838 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2839 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2840 			umr_block_id * NIC_UMR_OFFSET;
2841 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2842 	}
2843 
2844 	/* Expose decoder HW configuration block to user */
2845 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2846 
2847 	for (i = 1; i < NUM_OF_DCORES; ++i) {
2848 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2849 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2850 
2851 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2852 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2853 
2854 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2855 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2856 	}
2857 }
2858 
2859 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2860 {
2861 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2862 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2863 	int i, j, rc = 0;
2864 
2865 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
2866 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2867 	 * range.
2868 	 */
2869 
2870 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2871 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2872 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2873 		if (!virt_addr_arr[i]) {
2874 			rc = -ENOMEM;
2875 			goto free_dma_mem_arr;
2876 		}
2877 
2878 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2879 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2880 			break;
2881 	}
2882 
2883 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2884 		dev_err(hdev->dev,
2885 			"MSB of ARC accessible DMA memory are not identical in all range\n");
2886 		rc = -EFAULT;
2887 		goto free_dma_mem_arr;
2888 	}
2889 
2890 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2891 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2892 
2893 free_dma_mem_arr:
2894 	for (j = 0 ; j < i ; j++)
2895 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2896 						dma_addr_arr[j]);
2897 
2898 	return rc;
2899 }
2900 
2901 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2902 {
2903 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2904 	struct pci_mem_region *region;
2905 
2906 	/* CFG */
2907 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
2908 	region->region_base = CFG_BASE;
2909 	region->region_size = CFG_SIZE;
2910 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2911 	region->bar_size = CFG_BAR_SIZE;
2912 	region->bar_id = SRAM_CFG_BAR_ID;
2913 	region->used = 1;
2914 
2915 	/* SRAM */
2916 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2917 	region->region_base = SRAM_BASE_ADDR;
2918 	region->region_size = SRAM_SIZE;
2919 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2920 	region->bar_size = CFG_BAR_SIZE;
2921 	region->bar_id = SRAM_CFG_BAR_ID;
2922 	region->used = 1;
2923 
2924 	/* DRAM */
2925 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2926 	region->region_base = DRAM_PHYS_BASE;
2927 	region->region_size = hdev->asic_prop.dram_size;
2928 	region->offset_in_bar = 0;
2929 	region->bar_size = prop->dram_pci_bar_size;
2930 	region->bar_id = DRAM_BAR_ID;
2931 	region->used = 1;
2932 }
2933 
2934 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2935 {
2936 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2937 	int i, j, k;
2938 
2939 	/* Initialize common user CQ interrupt */
2940 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2941 				HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2942 
2943 	/* Initialize common decoder interrupt */
2944 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2945 				HL_COMMON_DEC_INTERRUPT_ID, true);
2946 
2947 	/* User interrupts structure holds both decoder and user interrupts from various engines.
2948 	 * We first initialize the decoder interrupts and then we add the user interrupts.
2949 	 * The only limitation is that the last decoder interrupt id must be smaller
2950 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2951 	 */
2952 
2953 	/* Initialize decoder interrupts, expose only normal interrupts,
2954 	 * error interrupts to be handled by driver
2955 	 */
2956 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2957 										i += 2, j++)
2958 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2959 
2960 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2961 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2962 }
2963 
2964 static inline int gaudi2_get_non_zero_random_int(void)
2965 {
2966 	int rand = get_random_u32();
2967 
2968 	return rand ? rand : 1;
2969 }
2970 
2971 static int gaudi2_sw_init(struct hl_device *hdev)
2972 {
2973 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2974 	struct gaudi2_device *gaudi2;
2975 	int i, rc;
2976 
2977 	/* Allocate device structure */
2978 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2979 	if (!gaudi2)
2980 		return -ENOMEM;
2981 
2982 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2983 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2984 			continue;
2985 
2986 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2987 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2988 				GAUDI2_EVENT_SIZE);
2989 			rc = -EINVAL;
2990 			goto free_gaudi2_device;
2991 		}
2992 
2993 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2994 	}
2995 
2996 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2997 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2998 
2999 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3000 
3001 	hdev->asic_specific = gaudi2;
3002 
3003 	/* Create DMA pool for small allocations.
3004 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3005 	 * PI/CI registers allocated from this pool have this restriction
3006 	 */
3007 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3008 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3009 	if (!hdev->dma_pool) {
3010 		dev_err(hdev->dev, "failed to create DMA pool\n");
3011 		rc = -ENOMEM;
3012 		goto free_gaudi2_device;
3013 	}
3014 
3015 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3016 	if (rc)
3017 		goto free_dma_pool;
3018 
3019 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3020 	if (!hdev->cpu_accessible_dma_pool) {
3021 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3022 		rc = -ENOMEM;
3023 		goto free_cpu_dma_mem;
3024 	}
3025 
3026 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3027 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3028 	if (rc) {
3029 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3030 		rc = -EFAULT;
3031 		goto free_cpu_accessible_dma_pool;
3032 	}
3033 
3034 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3035 								&gaudi2->virt_msix_db_dma_addr);
3036 	if (!gaudi2->virt_msix_db_cpu_addr) {
3037 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3038 		rc = -ENOMEM;
3039 		goto free_cpu_accessible_dma_pool;
3040 	}
3041 
3042 	spin_lock_init(&gaudi2->hw_queues_lock);
3043 
3044 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3045 							&gaudi2->scratchpad_bus_address,
3046 							GFP_KERNEL | __GFP_ZERO);
3047 	if (!gaudi2->scratchpad_kernel_address) {
3048 		rc = -ENOMEM;
3049 		goto free_virt_msix_db_mem;
3050 	}
3051 
3052 	gaudi2_user_mapped_blocks_init(hdev);
3053 
3054 	/* Initialize user interrupts */
3055 	gaudi2_user_interrupt_setup(hdev);
3056 
3057 	hdev->supports_coresight = true;
3058 	hdev->supports_sync_stream = true;
3059 	hdev->supports_cb_mapping = true;
3060 	hdev->supports_wait_for_multi_cs = false;
3061 
3062 	prop->supports_compute_reset = true;
3063 
3064 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3065 
3066 	return 0;
3067 
3068 free_virt_msix_db_mem:
3069 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3070 free_cpu_accessible_dma_pool:
3071 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3072 free_cpu_dma_mem:
3073 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3074 					hdev->cpu_accessible_dma_address);
3075 free_dma_pool:
3076 	dma_pool_destroy(hdev->dma_pool);
3077 free_gaudi2_device:
3078 	kfree(gaudi2);
3079 	return rc;
3080 }
3081 
3082 static int gaudi2_sw_fini(struct hl_device *hdev)
3083 {
3084 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3085 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3086 
3087 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3088 
3089 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3090 
3091 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3092 						hdev->cpu_accessible_dma_address);
3093 
3094 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3095 					gaudi2->scratchpad_bus_address);
3096 
3097 	dma_pool_destroy(hdev->dma_pool);
3098 
3099 	kfree(gaudi2);
3100 
3101 	return 0;
3102 }
3103 
3104 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3105 {
3106 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3107 						QM_GLBL_CFG1_CQF_STOP |
3108 						QM_GLBL_CFG1_CP_STOP);
3109 
3110 	/* stop also the ARC */
3111 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3112 }
3113 
3114 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3115 {
3116 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3117 						QM_GLBL_CFG1_CQF_FLUSH |
3118 						QM_GLBL_CFG1_CP_FLUSH);
3119 }
3120 
3121 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3122 {
3123 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3124 }
3125 
3126 /**
3127  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3128  *
3129  * @hdev: pointer to the habanalabs device structure
3130  * @queue_id: queue to clear fence counters to
3131  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3132  *              getting stuck on any fence value. otherwise set all fence
3133  *              counters to 0 (standard clear of fence counters)
3134  */
3135 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3136 						bool skip_fence)
3137 {
3138 	u32 size, reg_base;
3139 	u32 addr, val;
3140 
3141 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3142 
3143 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3144 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3145 
3146 	/*
3147 	 * in case we want to make sure that QM that is stuck on a fence will
3148 	 * be released we should set the fence counter to a higher value that
3149 	 * the value the QM waiting for. to comply with any fence counter of
3150 	 * any value we set maximum fence value to all counters
3151 	 */
3152 	val = skip_fence ? U32_MAX : 0;
3153 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3154 }
3155 
3156 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3157 {
3158 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3159 
3160 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3161 	gaudi2_flush_qman_common(hdev, reg_base);
3162 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3163 }
3164 
3165 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3166 {
3167 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3168 	int dcore, inst;
3169 
3170 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3171 		goto stop_edma_qmans;
3172 
3173 	/* Stop CPs of PDMA QMANs */
3174 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3175 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3176 
3177 stop_edma_qmans:
3178 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3179 		return;
3180 
3181 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3182 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3183 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3184 			u32 qm_base;
3185 
3186 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3187 				continue;
3188 
3189 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3190 					inst * DCORE_EDMA_OFFSET;
3191 
3192 			/* Stop CPs of EDMA QMANs */
3193 			gaudi2_stop_qman_common(hdev, qm_base);
3194 		}
3195 	}
3196 }
3197 
3198 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3199 {
3200 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201 	u32 offset, i;
3202 
3203 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3204 
3205 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3206 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3207 			continue;
3208 
3209 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3210 	}
3211 }
3212 
3213 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3214 {
3215 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3216 	u32 reg_base;
3217 	int i;
3218 
3219 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3220 		return;
3221 
3222 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3223 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3224 			continue;
3225 
3226 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3227 		gaudi2_stop_qman_common(hdev, reg_base);
3228 	}
3229 }
3230 
3231 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3232 {
3233 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3234 	u32 reg_base;
3235 	int i;
3236 
3237 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3238 		return;
3239 
3240 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3241 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3242 			continue;
3243 
3244 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3245 		gaudi2_stop_qman_common(hdev, reg_base);
3246 	}
3247 }
3248 
3249 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3250 {
3251 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3252 	u32 reg_base, queue_id;
3253 	int i;
3254 
3255 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3256 		return;
3257 
3258 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3259 
3260 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3261 		if (!(hdev->nic_ports_mask & BIT(i)))
3262 			continue;
3263 
3264 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3265 		gaudi2_stop_qman_common(hdev, reg_base);
3266 	}
3267 }
3268 
3269 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3270 {
3271 	u32 reg_val;
3272 
3273 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3274 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3275 }
3276 
3277 static void gaudi2_dma_stall(struct hl_device *hdev)
3278 {
3279 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3280 	int dcore, inst;
3281 
3282 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3283 		goto stall_edma;
3284 
3285 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3286 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3287 
3288 stall_edma:
3289 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3290 		return;
3291 
3292 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3293 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3294 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3295 			u32 core_base;
3296 
3297 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3298 				continue;
3299 
3300 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3301 					inst * DCORE_EDMA_OFFSET;
3302 
3303 			/* Stall CPs of EDMA QMANs */
3304 			gaudi2_stall_dma_common(hdev, core_base);
3305 		}
3306 	}
3307 }
3308 
3309 static void gaudi2_mme_stall(struct hl_device *hdev)
3310 {
3311 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3312 	u32 offset, i;
3313 
3314 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3315 
3316 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3317 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3318 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3319 }
3320 
3321 static void gaudi2_tpc_stall(struct hl_device *hdev)
3322 {
3323 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3324 	u32 reg_base;
3325 	int i;
3326 
3327 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3328 		return;
3329 
3330 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3331 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3332 			continue;
3333 
3334 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3335 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3336 	}
3337 }
3338 
3339 static void gaudi2_rotator_stall(struct hl_device *hdev)
3340 {
3341 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3342 	u32 reg_val;
3343 	int i;
3344 
3345 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3346 		return;
3347 
3348 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3349 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3350 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3351 
3352 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3353 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3354 			continue;
3355 
3356 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3357 	}
3358 }
3359 
3360 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3361 {
3362 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3363 }
3364 
3365 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3366 {
3367 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3368 	int dcore, inst;
3369 
3370 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3371 		goto stop_edma_qmans;
3372 
3373 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3374 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3375 
3376 stop_edma_qmans:
3377 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3378 		return;
3379 
3380 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3381 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3382 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3383 			u32 qm_base;
3384 
3385 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3386 				continue;
3387 
3388 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3389 					inst * DCORE_EDMA_OFFSET;
3390 
3391 			/* Disable CPs of EDMA QMANs */
3392 			gaudi2_disable_qman_common(hdev, qm_base);
3393 		}
3394 	}
3395 }
3396 
3397 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3398 {
3399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3400 	u32 offset, i;
3401 
3402 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3403 
3404 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3405 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3406 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3407 }
3408 
3409 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3410 {
3411 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3412 	u32 reg_base;
3413 	int i;
3414 
3415 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3416 		return;
3417 
3418 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3419 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3420 			continue;
3421 
3422 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3423 		gaudi2_disable_qman_common(hdev, reg_base);
3424 	}
3425 }
3426 
3427 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3428 {
3429 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3430 	u32 reg_base;
3431 	int i;
3432 
3433 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3434 		return;
3435 
3436 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3437 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3438 			continue;
3439 
3440 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3441 		gaudi2_disable_qman_common(hdev, reg_base);
3442 	}
3443 }
3444 
3445 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3446 {
3447 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3448 	u32 reg_base, queue_id;
3449 	int i;
3450 
3451 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3452 		return;
3453 
3454 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3455 
3456 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3457 		if (!(hdev->nic_ports_mask & BIT(i)))
3458 			continue;
3459 
3460 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3461 		gaudi2_disable_qman_common(hdev, reg_base);
3462 	}
3463 }
3464 
3465 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3466 {
3467 	/* Disable the timestamp counter */
3468 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3469 
3470 	/* Zero the lower/upper parts of the 64-bit counter */
3471 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3472 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3473 
3474 	/* Enable the counter */
3475 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3476 }
3477 
3478 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3479 {
3480 	/* Disable the timestamp counter */
3481 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3482 }
3483 
3484 static const char *gaudi2_irq_name(u16 irq_number)
3485 {
3486 	switch (irq_number) {
3487 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3488 		return "gaudi2 cpu eq";
3489 	case GAUDI2_IRQ_NUM_COMPLETION:
3490 		return "gaudi2 completion";
3491 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3492 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3493 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3494 		return "gaudi2 user completion";
3495 	default:
3496 		return "invalid";
3497 	}
3498 }
3499 
3500 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3501 {
3502 	int i, irq, relative_idx;
3503 	struct hl_dec *dec;
3504 
3505 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3506 		irq = pci_irq_vector(hdev->pdev, i);
3507 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3508 
3509 		dec = hdev->dec + relative_idx / 2;
3510 
3511 		/* We pass different structures depending on the irq handler. For the abnormal
3512 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3513 		 * user_interrupt entry
3514 		 */
3515 		free_irq(irq, ((relative_idx % 2) ?
3516 				(void *) dec :
3517 				(void *) &hdev->user_interrupt[dec->core_id]));
3518 	}
3519 }
3520 
3521 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3522 {
3523 	int rc, i, irq_init_cnt, irq, relative_idx;
3524 	irq_handler_t irq_handler;
3525 	struct hl_dec *dec;
3526 
3527 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3528 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3529 			i++, irq_init_cnt++) {
3530 
3531 		irq = pci_irq_vector(hdev->pdev, i);
3532 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3533 
3534 		irq_handler = (relative_idx % 2) ?
3535 				hl_irq_handler_dec_abnrm :
3536 				hl_irq_handler_user_interrupt;
3537 
3538 		dec = hdev->dec + relative_idx / 2;
3539 
3540 		/* We pass different structures depending on the irq handler. For the abnormal
3541 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3542 		 * user_interrupt entry
3543 		 */
3544 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3545 				((relative_idx % 2) ?
3546 				(void *) dec :
3547 				(void *) &hdev->user_interrupt[dec->core_id]));
3548 		if (rc) {
3549 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3550 			goto free_dec_irqs;
3551 		}
3552 	}
3553 
3554 	return 0;
3555 
3556 free_dec_irqs:
3557 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3558 	return rc;
3559 }
3560 
3561 static int gaudi2_enable_msix(struct hl_device *hdev)
3562 {
3563 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3564 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3565 	int rc, irq, i, j, user_irq_init_cnt;
3566 	irq_handler_t irq_handler;
3567 	struct hl_cq *cq;
3568 
3569 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3570 		return 0;
3571 
3572 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3573 					PCI_IRQ_MSIX);
3574 	if (rc < 0) {
3575 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3576 			GAUDI2_MSIX_ENTRIES, rc);
3577 		return rc;
3578 	}
3579 
3580 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3581 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3582 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3583 	if (rc) {
3584 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3585 		goto free_irq_vectors;
3586 	}
3587 
3588 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3589 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3590 			&hdev->event_queue);
3591 	if (rc) {
3592 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3593 		goto free_completion_irq;
3594 	}
3595 
3596 	rc = gaudi2_dec_enable_msix(hdev);
3597 	if (rc) {
3598 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
3599 		goto free_event_irq;
3600 	}
3601 
3602 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3603 			user_irq_init_cnt < prop->user_interrupt_count;
3604 			i++, j++, user_irq_init_cnt++) {
3605 
3606 		irq = pci_irq_vector(hdev->pdev, i);
3607 		irq_handler = hl_irq_handler_user_interrupt;
3608 
3609 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3610 		if (rc) {
3611 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3612 			goto free_user_irq;
3613 		}
3614 	}
3615 
3616 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3617 
3618 	return 0;
3619 
3620 free_user_irq:
3621 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3622 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3623 
3624 		irq = pci_irq_vector(hdev->pdev, i);
3625 		free_irq(irq, &hdev->user_interrupt[j]);
3626 	}
3627 
3628 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3629 
3630 free_event_irq:
3631 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3632 	free_irq(irq, cq);
3633 
3634 free_completion_irq:
3635 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3636 	free_irq(irq, cq);
3637 
3638 free_irq_vectors:
3639 	pci_free_irq_vectors(hdev->pdev);
3640 
3641 	return rc;
3642 }
3643 
3644 static void gaudi2_sync_irqs(struct hl_device *hdev)
3645 {
3646 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3647 	int i, j;
3648 	int irq;
3649 
3650 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3651 		return;
3652 
3653 	/* Wait for all pending IRQs to be finished */
3654 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3655 
3656 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3657 		irq = pci_irq_vector(hdev->pdev, i);
3658 		synchronize_irq(irq);
3659 	}
3660 
3661 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3662 										i++, j++) {
3663 		irq = pci_irq_vector(hdev->pdev, i);
3664 		synchronize_irq(irq);
3665 	}
3666 
3667 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3668 }
3669 
3670 static void gaudi2_disable_msix(struct hl_device *hdev)
3671 {
3672 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3673 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3674 	struct hl_cq *cq;
3675 	int irq, i, j, k;
3676 
3677 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3678 		return;
3679 
3680 	gaudi2_sync_irqs(hdev);
3681 
3682 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3683 	free_irq(irq, &hdev->event_queue);
3684 
3685 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3686 
3687 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3688 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3689 
3690 		irq = pci_irq_vector(hdev->pdev, i);
3691 		free_irq(irq, &hdev->user_interrupt[j]);
3692 	}
3693 
3694 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3695 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3696 	free_irq(irq, cq);
3697 
3698 	pci_free_irq_vectors(hdev->pdev);
3699 
3700 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3701 }
3702 
3703 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3704 {
3705 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3706 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3707 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3708 	int rc;
3709 
3710 	if (hdev->pldm)
3711 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3712 	else
3713 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3714 
3715 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3716 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3717 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3718 			continue;
3719 
3720 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3721 
3722 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3723 
3724 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3725 
3726 		/* Wait till all traffic from decoder stops
3727 		 * before apply core reset.
3728 		 */
3729 		rc = hl_poll_timeout(
3730 				hdev,
3731 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3732 				graceful,
3733 				(graceful & graceful_pend_mask),
3734 				100,
3735 				timeout_usec);
3736 		if (rc)
3737 			dev_err(hdev->dev,
3738 				"Failed to stop traffic from DCORE%d Decoder %d\n",
3739 				dcore_id, dec_id);
3740 	}
3741 }
3742 
3743 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3744 {
3745 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3746 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3747 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3748 	int rc;
3749 
3750 	if (hdev->pldm)
3751 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3752 	else
3753 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3754 
3755 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3756 		dec_bit = PCIE_DEC_SHIFT + dec_id;
3757 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3758 			continue;
3759 
3760 		offset = dec_id * PCIE_VDEC_OFFSET;
3761 
3762 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3763 
3764 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3765 
3766 		/* Wait till all traffic from decoder stops
3767 		 * before apply core reset.
3768 		 */
3769 		rc = hl_poll_timeout(
3770 				hdev,
3771 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3772 				graceful,
3773 				(graceful & graceful_pend_mask),
3774 				100,
3775 				timeout_usec);
3776 		if (rc)
3777 			dev_err(hdev->dev,
3778 				"Failed to stop traffic from PCIe Decoder %d\n",
3779 				dec_id);
3780 	}
3781 }
3782 
3783 static void gaudi2_stop_dec(struct hl_device *hdev)
3784 {
3785 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3786 	int dcore_id;
3787 
3788 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3789 		return;
3790 
3791 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3792 		gaudi2_stop_dcore_dec(hdev, dcore_id);
3793 
3794 	gaudi2_stop_pcie_dec(hdev);
3795 }
3796 
3797 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3798 {
3799 	u32 reg_base, reg_val;
3800 
3801 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3802 	if (run_mode == HL_ENGINE_CORE_RUN)
3803 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3804 	else
3805 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3806 
3807 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3808 }
3809 
3810 static void gaudi2_halt_arcs(struct hl_device *hdev)
3811 {
3812 	u16 arc_id;
3813 
3814 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3815 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3816 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3817 	}
3818 }
3819 
3820 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3821 {
3822 	int rc;
3823 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
3824 
3825 	if (hdev->pldm)
3826 		timeout_usec *= 100;
3827 
3828 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3829 	if (run_mode == HL_ENGINE_CORE_RUN)
3830 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3831 	else
3832 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3833 
3834 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3835 				val, ((val & ack_mask) == ack_mask),
3836 				1000, timeout_usec);
3837 
3838 	if (!rc) {
3839 		/* Clear */
3840 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3841 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3842 	}
3843 
3844 	return rc;
3845 }
3846 
3847 static void gaudi2_reset_arcs(struct hl_device *hdev)
3848 {
3849 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3850 	u16 arc_id;
3851 
3852 	if (!gaudi2)
3853 		return;
3854 
3855 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3856 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3857 			gaudi2_clr_arc_id_cap(hdev, arc_id);
3858 }
3859 
3860 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3861 {
3862 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3863 	u32 queue_id;
3864 	int i;
3865 
3866 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3867 		return;
3868 
3869 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3870 
3871 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3872 		if (!(hdev->nic_ports_mask & BIT(i)))
3873 			continue;
3874 
3875 		gaudi2_qman_manual_flush_common(hdev, queue_id);
3876 	}
3877 }
3878 
3879 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3880 					u32 num_cores, u32 core_command)
3881 {
3882 	int i, rc;
3883 
3884 
3885 	for (i = 0 ; i < num_cores ; i++) {
3886 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3887 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3888 	}
3889 
3890 	for (i = 0 ; i < num_cores ; i++) {
3891 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3892 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3893 
3894 			if (rc) {
3895 				dev_err(hdev->dev, "failed to %s arc: %d\n",
3896 					(core_command == HL_ENGINE_CORE_HALT) ?
3897 					"HALT" : "RUN", core_ids[i]);
3898 				return -1;
3899 			}
3900 		}
3901 	}
3902 
3903 	return 0;
3904 }
3905 
3906 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3907 {
3908 	u32 wait_timeout_ms;
3909 
3910 	if (hdev->pldm)
3911 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3912 	else
3913 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3914 
3915 	if (fw_reset)
3916 		goto skip_engines;
3917 
3918 	gaudi2_stop_dma_qmans(hdev);
3919 	gaudi2_stop_mme_qmans(hdev);
3920 	gaudi2_stop_tpc_qmans(hdev);
3921 	gaudi2_stop_rot_qmans(hdev);
3922 	gaudi2_stop_nic_qmans(hdev);
3923 	msleep(wait_timeout_ms);
3924 
3925 	gaudi2_halt_arcs(hdev);
3926 	gaudi2_dma_stall(hdev);
3927 	gaudi2_mme_stall(hdev);
3928 	gaudi2_tpc_stall(hdev);
3929 	gaudi2_rotator_stall(hdev);
3930 
3931 	msleep(wait_timeout_ms);
3932 
3933 	gaudi2_stop_dec(hdev);
3934 
3935 	/*
3936 	 * in case of soft reset do a manual flush for QMANs (currently called
3937 	 * only for NIC QMANs
3938 	 */
3939 	if (!hard_reset)
3940 		gaudi2_nic_qmans_manual_flush(hdev);
3941 
3942 	gaudi2_disable_dma_qmans(hdev);
3943 	gaudi2_disable_mme_qmans(hdev);
3944 	gaudi2_disable_tpc_qmans(hdev);
3945 	gaudi2_disable_rot_qmans(hdev);
3946 	gaudi2_disable_nic_qmans(hdev);
3947 	gaudi2_disable_timestamp(hdev);
3948 
3949 skip_engines:
3950 	if (hard_reset) {
3951 		gaudi2_disable_msix(hdev);
3952 		return;
3953 	}
3954 
3955 	gaudi2_sync_irqs(hdev);
3956 }
3957 
3958 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3959 {
3960 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3961 
3962 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3963 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3964 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3965 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3966 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3967 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3968 }
3969 
3970 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3971 {
3972 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3973 	struct dynamic_fw_load_mgr *dynamic_loader;
3974 	struct cpu_dyn_regs *dyn_regs;
3975 
3976 	/* fill common fields */
3977 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3978 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3979 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3980 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3981 	fw_loader->skip_bmc = false;
3982 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3983 	fw_loader->dram_bar_id = DRAM_BAR_ID;
3984 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3985 
3986 	/* here we update initial values for few specific dynamic regs (as
3987 	 * before reading the first descriptor from FW those value has to be
3988 	 * hard-coded). in later stages of the protocol those values will be
3989 	 * updated automatically by reading the FW descriptor so data there
3990 	 * will always be up-to-date
3991 	 */
3992 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3993 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3994 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3995 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3996 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3997 }
3998 
3999 static int gaudi2_init_cpu(struct hl_device *hdev)
4000 {
4001 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4002 	int rc;
4003 
4004 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4005 		return 0;
4006 
4007 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4008 		return 0;
4009 
4010 	rc = hl_fw_init_cpu(hdev);
4011 	if (rc)
4012 		return rc;
4013 
4014 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4015 
4016 	return 0;
4017 }
4018 
4019 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4020 {
4021 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4022 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4023 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4024 	struct cpu_dyn_regs *dyn_regs;
4025 	struct hl_eq *eq;
4026 	u32 status;
4027 	int err;
4028 
4029 	if (!hdev->cpu_queues_enable)
4030 		return 0;
4031 
4032 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4033 		return 0;
4034 
4035 	eq = &hdev->event_queue;
4036 
4037 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4038 
4039 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4040 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4041 
4042 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4043 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4044 
4045 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4046 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4047 
4048 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4049 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4050 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4051 
4052 	/* Used for EQ CI */
4053 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4054 
4055 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4056 
4057 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4058 
4059 	/* Let the ARC know we are ready as it is now handling those queues  */
4060 
4061 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4062 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4063 
4064 	err = hl_poll_timeout(
4065 		hdev,
4066 		mmCPU_IF_QUEUE_INIT,
4067 		status,
4068 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4069 		1000,
4070 		cpu_timeout);
4071 
4072 	if (err) {
4073 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4074 		return -EIO;
4075 	}
4076 
4077 	/* update FW application security bits */
4078 	if (prop->fw_cpu_boot_dev_sts0_valid)
4079 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4080 
4081 	if (prop->fw_cpu_boot_dev_sts1_valid)
4082 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4083 
4084 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4085 	return 0;
4086 }
4087 
4088 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4089 				u32 queue_id_base)
4090 {
4091 	struct hl_hw_queue *q;
4092 	u32 pq_id, pq_offset;
4093 
4094 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4095 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4096 		pq_offset = pq_id * 4;
4097 
4098 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4099 				lower_32_bits(q->bus_address));
4100 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4101 				upper_32_bits(q->bus_address));
4102 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4103 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4104 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4105 	}
4106 }
4107 
4108 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4109 {
4110 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4111 
4112 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4113 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4114 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4115 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4116 
4117 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4118 		cp_offset = cp_id * 4;
4119 
4120 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4121 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4122 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4123 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4124 	}
4125 
4126 	/* allow QMANs to accept work from ARC CQF */
4127 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4128 }
4129 
4130 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4131 				u32 queue_id_base)
4132 {
4133 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4134 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4135 
4136 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4137 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4138 
4139 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4140 		pq_offset = pq_id * 4;
4141 
4142 		/* Configure QMAN HBW to scratchpad as it is not needed */
4143 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4144 				lower_32_bits(gaudi2->scratchpad_bus_address));
4145 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4146 				upper_32_bits(gaudi2->scratchpad_bus_address));
4147 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4148 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4149 
4150 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4151 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4152 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4153 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4154 	}
4155 
4156 	/* Enable QMAN H/W completion */
4157 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4158 }
4159 
4160 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4161 {
4162 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4163 	u32 sp_reg_addr;
4164 
4165 	switch (queue_id_base) {
4166 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4167 		fallthrough;
4168 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4169 		fallthrough;
4170 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4171 		fallthrough;
4172 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4173 		fallthrough;
4174 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4175 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4176 		break;
4177 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4178 		fallthrough;
4179 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4180 		fallthrough;
4181 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4182 		fallthrough;
4183 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4184 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4185 		break;
4186 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4187 		fallthrough;
4188 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4189 		fallthrough;
4190 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4191 		fallthrough;
4192 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4193 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4194 		break;
4195 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4196 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4197 		break;
4198 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4199 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4200 		break;
4201 	default:
4202 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4203 		return 0;
4204 	}
4205 
4206 	return sp_reg_addr;
4207 }
4208 
4209 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4210 					u32 queue_id_base)
4211 {
4212 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4213 	int map_table_entry;
4214 
4215 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4216 
4217 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4218 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4219 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4220 
4221 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4222 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4223 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4224 
4225 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4226 
4227 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4228 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4229 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4230 
4231 	/* Enable the QMAN channel.
4232 	 * PDMA QMAN configuration is different, as we do not allow user to
4233 	 * access some of the CPs.
4234 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4235 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4236 	 */
4237 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4238 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4239 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4240 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4241 	else
4242 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4243 }
4244 
4245 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4246 		u32 queue_id_base)
4247 {
4248 	u32 pq_id;
4249 
4250 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4251 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4252 
4253 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4254 	gaudi2_init_qman_cp(hdev, reg_base);
4255 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4256 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4257 }
4258 
4259 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4260 				u32 dma_core_id, bool is_secure)
4261 {
4262 	u32 prot, irq_handler_offset;
4263 	struct cpu_dyn_regs *dyn_regs;
4264 	int map_table_entry;
4265 
4266 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4267 	if (is_secure)
4268 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4269 
4270 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4271 
4272 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4273 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4274 
4275 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4276 			lower_32_bits(CFG_BASE + irq_handler_offset));
4277 
4278 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4279 			upper_32_bits(CFG_BASE + irq_handler_offset));
4280 
4281 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4282 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4283 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4284 
4285 	/* Enable the DMA channel */
4286 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4287 }
4288 
4289 static void gaudi2_init_kdma(struct hl_device *hdev)
4290 {
4291 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4292 	u32 reg_base;
4293 
4294 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4295 		return;
4296 
4297 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4298 
4299 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4300 
4301 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4302 }
4303 
4304 static void gaudi2_init_pdma(struct hl_device *hdev)
4305 {
4306 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4307 	u32 reg_base;
4308 
4309 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4310 		return;
4311 
4312 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4313 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4314 
4315 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4316 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4317 
4318 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4319 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4320 
4321 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4322 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4323 
4324 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4325 }
4326 
4327 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4328 {
4329 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
4330 
4331 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4332 	base_edma_qman_id = edma_stream_base[seq];
4333 
4334 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4335 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4336 
4337 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4338 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4339 }
4340 
4341 static void gaudi2_init_edma(struct hl_device *hdev)
4342 {
4343 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4344 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4345 	int dcore, inst;
4346 
4347 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4348 		return;
4349 
4350 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4351 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4352 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4353 
4354 			if (!(prop->edma_enabled_mask & BIT(seq)))
4355 				continue;
4356 
4357 			gaudi2_init_edma_instance(hdev, seq);
4358 
4359 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4360 		}
4361 	}
4362 }
4363 
4364 /*
4365  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4366  * @hdev: pointer to habanalabs device structure.
4367  * @sob_id: sync object ID.
4368  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4369  * @interrupt_id: interrupt ID.
4370  *
4371  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4372  * write directly to the HBW host memory of the virtual MSI-X doorbell.
4373  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4374  *
4375  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4376  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4377  * completion, by decrementing the sync object value and re-arming the monitor.
4378  */
4379 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4380 							u32 first_mon_id, u32 interrupt_id)
4381 {
4382 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4383 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4384 	u64 addr;
4385 	u8 mask;
4386 
4387 	/* Reset the SOB value */
4388 	sob_offset = sob_id * sizeof(u32);
4389 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4390 
4391 	/* Configure 3 monitors:
4392 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4393 	 * 2. Decrement SOB value by 1.
4394 	 * 3. Re-arm the master monitor.
4395 	 */
4396 
4397 	first_mon_offset = first_mon_id * sizeof(u32);
4398 
4399 	/* 2nd monitor: Decrement SOB value by 1 */
4400 	mon_offset = first_mon_offset + sizeof(u32);
4401 
4402 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4403 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4404 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4405 
4406 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4407 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4408 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4409 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4410 
4411 	/* 3rd monitor: Re-arm the master monitor */
4412 	mon_offset = first_mon_offset + 2 * sizeof(u32);
4413 
4414 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4415 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4416 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4417 
4418 	sob_group = sob_id / 8;
4419 	mask = ~BIT(sob_id & 0x7);
4420 	mode = 0; /* comparison mode is "greater than or equal to" */
4421 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4422 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4423 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4424 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4425 
4426 	payload = arm;
4427 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4428 
4429 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4430 	mon_offset = first_mon_offset;
4431 
4432 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4433 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4434 
4435 	addr = gaudi2->virt_msix_db_dma_addr;
4436 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4437 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4438 
4439 	payload = interrupt_id;
4440 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4441 
4442 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4443 }
4444 
4445 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4446 {
4447 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4448 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4449 
4450 	/* Decoder normal/abnormal interrupts */
4451 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4452 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4453 			continue;
4454 
4455 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4456 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4457 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4458 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4459 
4460 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4461 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4462 		interrupt_id += 1;
4463 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4464 	}
4465 }
4466 
4467 static void gaudi2_init_sm(struct hl_device *hdev)
4468 {
4469 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4470 	u64 cq_address;
4471 	u32 reg_val;
4472 	int i;
4473 
4474 	/* Enable HBW/LBW CQ for completion monitors */
4475 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4476 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4477 
4478 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4479 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4480 
4481 	/* Enable only HBW CQ for KDMA completion monitor */
4482 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4483 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4484 
4485 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
4486 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4487 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4488 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4489 
4490 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4491 		cq_address =
4492 			hdev->completion_queue[i].bus_address;
4493 
4494 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4495 							lower_32_bits(cq_address));
4496 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4497 							upper_32_bits(cq_address));
4498 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4499 							ilog2(HL_CQ_SIZE_IN_BYTES));
4500 	}
4501 
4502 	/* Configure kernel ASID and MMU BP*/
4503 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4504 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4505 
4506 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4507 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
4508 }
4509 
4510 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4511 {
4512 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4513 	u32 reg_val;
4514 	int i;
4515 
4516 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4517 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4518 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4519 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4520 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4521 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4522 
4523 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4524 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4525 
4526 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4527 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4528 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4529 	}
4530 }
4531 
4532 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4533 							bool config_qman_only)
4534 {
4535 	u32 queue_id_base, reg_base;
4536 
4537 	switch (dcore_id) {
4538 	case 0:
4539 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4540 		break;
4541 	case 1:
4542 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4543 		break;
4544 	case 2:
4545 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4546 		break;
4547 	case 3:
4548 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4549 		break;
4550 	default:
4551 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4552 		return;
4553 	}
4554 
4555 	if (!config_qman_only) {
4556 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4557 		gaudi2_init_mme_acc(hdev, reg_base);
4558 	}
4559 
4560 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4561 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
4562 }
4563 
4564 static void gaudi2_init_mme(struct hl_device *hdev)
4565 {
4566 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4567 	int i;
4568 
4569 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4570 		return;
4571 
4572 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4573 		gaudi2_init_dcore_mme(hdev, i, false);
4574 
4575 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4576 	}
4577 }
4578 
4579 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4580 {
4581 	/* Mask arithmetic and QM interrupts in TPC */
4582 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4583 
4584 	/* Set 16 cache lines */
4585 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4586 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4587 }
4588 
4589 struct gaudi2_tpc_init_cfg_data {
4590 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4591 };
4592 
4593 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4594 					u32 offset, struct iterate_module_ctx *ctx)
4595 {
4596 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4597 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4598 	u32 queue_id_base;
4599 	u8 seq;
4600 
4601 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4602 
4603 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4604 		/* gets last sequence number */
4605 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4606 	else
4607 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4608 
4609 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4610 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4611 
4612 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4613 }
4614 
4615 static void gaudi2_init_tpc(struct hl_device *hdev)
4616 {
4617 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4618 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
4619 	struct iterate_module_ctx tpc_iter;
4620 
4621 	if (!hdev->asic_prop.tpc_enabled_mask)
4622 		return;
4623 
4624 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4625 		return;
4626 
4627 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4628 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4629 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4630 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4631 	tpc_iter.fn = &gaudi2_init_tpc_config;
4632 	tpc_iter.data = &init_cfg_data;
4633 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
4634 }
4635 
4636 static void gaudi2_init_rotator(struct hl_device *hdev)
4637 {
4638 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4639 	u32 i, reg_base, queue_id;
4640 
4641 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4642 
4643 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4644 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4645 		gaudi2_init_qman(hdev, reg_base, queue_id);
4646 
4647 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4648 	}
4649 }
4650 
4651 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4652 {
4653 	u32 sob_id;
4654 
4655 	/* VCMD normal interrupt */
4656 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4657 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4658 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4659 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4660 
4661 	/* VCMD abnormal interrupt */
4662 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4663 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4664 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4665 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4666 }
4667 
4668 static void gaudi2_init_dec(struct hl_device *hdev)
4669 {
4670 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4671 	u32 dcore_id, dec_id, dec_bit;
4672 	u64 base_addr;
4673 
4674 	if (!hdev->asic_prop.decoder_enabled_mask)
4675 		return;
4676 
4677 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4678 		return;
4679 
4680 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4681 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4682 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4683 
4684 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4685 				continue;
4686 
4687 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
4688 					BRDG_CTRL_BLOCK_OFFSET +
4689 					dcore_id * DCORE_OFFSET +
4690 					dec_id * DCORE_VDEC_OFFSET;
4691 
4692 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4693 
4694 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4695 		}
4696 
4697 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4698 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4699 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4700 			continue;
4701 
4702 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4703 				dec_id * DCORE_VDEC_OFFSET;
4704 
4705 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4706 
4707 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4708 	}
4709 }
4710 
4711 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4712 					u32 stlb_base, u32 asid, u64 phys_addr)
4713 {
4714 	u32 status, timeout_usec;
4715 	int rc;
4716 
4717 	if (hdev->pldm || !hdev->pdev)
4718 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4719 	else
4720 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4721 
4722 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4723 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4724 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4725 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4726 
4727 	rc = hl_poll_timeout(
4728 		hdev,
4729 		stlb_base + STLB_BUSY_OFFSET,
4730 		status,
4731 		!(status & 0x80000000),
4732 		1000,
4733 		timeout_usec);
4734 
4735 	if (rc) {
4736 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4737 		return rc;
4738 	}
4739 
4740 	return 0;
4741 }
4742 
4743 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4744 					u32 start_offset, u32 inv_start_val,
4745 					u32 flags)
4746 {
4747 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
4748 	if (flags & MMU_OP_CLEAR_MEMCACHE)
4749 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4750 
4751 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4752 		return;
4753 
4754 	WREG32(stlb_base + start_offset, inv_start_val);
4755 }
4756 
4757 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4758 						struct gaudi2_cache_invld_params *inv_params)
4759 {
4760 	u32 status, timeout_usec, start_offset;
4761 	int rc;
4762 
4763 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4764 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4765 
4766 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
4767 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4768 		rc = hl_poll_timeout(
4769 			hdev,
4770 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4771 			status,
4772 			status & 0x1,
4773 			1000,
4774 			timeout_usec);
4775 
4776 		if (rc)
4777 			return rc;
4778 
4779 		/* Need to manually reset the status to 0 */
4780 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4781 	}
4782 
4783 	/* Lower cache does not work with cache lines, hence we can skip its
4784 	 * invalidation upon map and invalidate only upon unmap
4785 	 */
4786 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4787 		return 0;
4788 
4789 	start_offset = inv_params->range_invalidation ?
4790 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4791 
4792 	rc = hl_poll_timeout(
4793 		hdev,
4794 		stlb_base + start_offset,
4795 		status,
4796 		!(status & 0x1),
4797 		1000,
4798 		timeout_usec);
4799 
4800 	return rc;
4801 }
4802 
4803 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4804 {
4805 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4806 	u32 hw_cap;
4807 
4808 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4809 
4810 	if (gaudi2->hw_cap_initialized & hw_cap)
4811 		return true;
4812 
4813 	return false;
4814 }
4815 
4816 /* this function shall be called only for HMMUs for which capability bit is set */
4817 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4818 {
4819 	u32 offset;
4820 
4821 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4822 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4823 }
4824 
4825 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4826 						struct gaudi2_cache_invld_params *inv_params)
4827 {
4828 	u32 start_offset;
4829 
4830 	if (inv_params->range_invalidation) {
4831 		/* Set the addresses range
4832 		 * Note: that the start address we set in register, is not included in
4833 		 * the range of the invalidation, by design.
4834 		 * that's why we need to set lower address than the one we actually
4835 		 * want to be included in the range invalidation.
4836 		 */
4837 		u64 start = inv_params->start_va - 1;
4838 
4839 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4840 
4841 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4842 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4843 
4844 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4845 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4846 
4847 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4848 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4849 
4850 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4851 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4852 	} else {
4853 		start_offset = STLB_INV_ALL_START_OFFSET;
4854 	}
4855 
4856 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4857 						inv_params->inv_start_val, inv_params->flags);
4858 }
4859 
4860 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4861 						int dcore_id, int hmmu_id,
4862 						struct gaudi2_cache_invld_params *inv_params)
4863 {
4864 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4865 
4866 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4867 }
4868 
4869 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4870 						int dcore_id, int hmmu_id,
4871 						struct gaudi2_cache_invld_params *inv_params)
4872 {
4873 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4874 
4875 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4876 }
4877 
4878 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4879 						struct gaudi2_cache_invld_params *inv_params)
4880 {
4881 	int dcore_id, hmmu_id;
4882 
4883 	/* first send all invalidation commands */
4884 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4885 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4886 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4887 				continue;
4888 
4889 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4890 		}
4891 	}
4892 
4893 	/* next, poll all invalidations status */
4894 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4895 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4896 			int rc;
4897 
4898 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4899 				continue;
4900 
4901 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4902 										inv_params);
4903 			if (rc)
4904 				return rc;
4905 		}
4906 	}
4907 
4908 	return 0;
4909 }
4910 
4911 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4912 {
4913 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4914 	struct gaudi2_cache_invld_params invld_params;
4915 	int rc = 0;
4916 
4917 	if (hdev->reset_info.hard_reset_pending)
4918 		return rc;
4919 
4920 	invld_params.range_invalidation = false;
4921 	invld_params.inv_start_val = 1;
4922 
4923 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4924 		invld_params.flags = flags;
4925 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4926 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4927 										&invld_params);
4928 	} else if (flags & MMU_OP_PHYS_PACK) {
4929 		invld_params.flags = 0;
4930 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4931 	}
4932 
4933 	return rc;
4934 }
4935 
4936 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4937 				u32 flags, u32 asid, u64 va, u64 size)
4938 {
4939 	struct gaudi2_cache_invld_params invld_params = {0};
4940 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4941 	u64 start_va, end_va;
4942 	u32 inv_start_val;
4943 	int rc = 0;
4944 
4945 	if (hdev->reset_info.hard_reset_pending)
4946 		return 0;
4947 
4948 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4949 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4950 			asid << MMU_RANGE_INV_ASID_SHIFT);
4951 	start_va = va;
4952 	end_va = start_va + size;
4953 
4954 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4955 		/* As range invalidation does not support zero address we will
4956 		 * do full invalidation in this case
4957 		 */
4958 		if (start_va) {
4959 			invld_params.range_invalidation = true;
4960 			invld_params.start_va = start_va;
4961 			invld_params.end_va = end_va;
4962 			invld_params.inv_start_val = inv_start_val;
4963 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
4964 		} else {
4965 			invld_params.range_invalidation = false;
4966 			invld_params.inv_start_val = 1;
4967 			invld_params.flags = flags;
4968 		}
4969 
4970 
4971 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4972 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4973 										&invld_params);
4974 		if (rc)
4975 			return rc;
4976 
4977 	} else if (flags & MMU_OP_PHYS_PACK) {
4978 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
4979 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
4980 		invld_params.inv_start_val = inv_start_val;
4981 		invld_params.flags = flags;
4982 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4983 	}
4984 
4985 	return rc;
4986 }
4987 
4988 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
4989 {
4990 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4991 	u64 hop0_addr;
4992 	u32 asid, max_asid = prop->max_asid;
4993 	int rc;
4994 
4995 	/* it takes too much time to init all of the ASIDs on palladium */
4996 	if (hdev->pldm)
4997 		max_asid = min((u32) 8, max_asid);
4998 
4999 	for (asid = 0 ; asid < max_asid ; asid++) {
5000 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5001 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5002 		if (rc) {
5003 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5004 			return rc;
5005 		}
5006 	}
5007 
5008 	return 0;
5009 }
5010 
5011 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5012 {
5013 	u32 status, timeout_usec;
5014 	int rc;
5015 
5016 	if (hdev->pldm || !hdev->pdev)
5017 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5018 	else
5019 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5020 
5021 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5022 
5023 	rc = hl_poll_timeout(
5024 		hdev,
5025 		stlb_base + STLB_SRAM_INIT_OFFSET,
5026 		status,
5027 		!status,
5028 		1000,
5029 		timeout_usec);
5030 
5031 	if (rc)
5032 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5033 
5034 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5035 	if (rc)
5036 		return rc;
5037 
5038 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5039 
5040 	rc = hl_poll_timeout(
5041 		hdev,
5042 		stlb_base + STLB_INV_ALL_START_OFFSET,
5043 		status,
5044 		!status,
5045 		1000,
5046 		timeout_usec);
5047 
5048 	if (rc)
5049 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5050 
5051 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5052 
5053 	return rc;
5054 }
5055 
5056 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5057 {
5058 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5059 	u32 mmu_base, stlb_base;
5060 	int rc;
5061 
5062 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5063 		return 0;
5064 
5065 	mmu_base = mmPMMU_HBW_MMU_BASE;
5066 	stlb_base = mmPMMU_HBW_STLB_BASE;
5067 
5068 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5069 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5070 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5071 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5072 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5073 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5074 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5075 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5076 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5077 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5078 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5079 
5080 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5081 
5082 	if (PAGE_SIZE == SZ_64K) {
5083 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5084 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5085 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5086 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5087 			FIELD_PREP(
5088 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5089 				1),
5090 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5091 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5092 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5093 	}
5094 
5095 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5096 
5097 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5098 	if (rc)
5099 		return rc;
5100 
5101 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5102 
5103 	return 0;
5104 }
5105 
5106 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5107 				int hmmu_id)
5108 {
5109 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5110 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5111 	u32 offset, mmu_base, stlb_base, hw_cap;
5112 	u8 dmmu_seq;
5113 	int rc;
5114 
5115 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5116 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5117 
5118 	/*
5119 	 * return if DMMU is already initialized or if it's not out of
5120 	 * isolation (due to cluster binning)
5121 	 */
5122 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5123 		return 0;
5124 
5125 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5126 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5127 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5128 
5129 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5130 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5131 
5132 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5133 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5134 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5135 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5136 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5137 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5138 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5139 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5140 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5141 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5142 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5143 
5144 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5145 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5146 
5147 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5148 
5149 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5150 	if (rc)
5151 		return rc;
5152 
5153 	gaudi2->hw_cap_initialized |= hw_cap;
5154 
5155 	return 0;
5156 }
5157 
5158 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5159 {
5160 	int rc, dcore_id, hmmu_id;
5161 
5162 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5163 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5164 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5165 			if (rc)
5166 				return rc;
5167 		}
5168 
5169 	return 0;
5170 }
5171 
5172 static int gaudi2_mmu_init(struct hl_device *hdev)
5173 {
5174 	int rc;
5175 
5176 	rc = gaudi2_pci_mmu_init(hdev);
5177 	if (rc)
5178 		return rc;
5179 
5180 	rc = gaudi2_hbm_mmu_init(hdev);
5181 	if (rc)
5182 		return rc;
5183 
5184 	return 0;
5185 }
5186 
5187 static int gaudi2_hw_init(struct hl_device *hdev)
5188 {
5189 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5190 	int rc;
5191 
5192 	/* Let's mark in the H/W that we have reached this point. We check
5193 	 * this value in the reset_before_init function to understand whether
5194 	 * we need to reset the chip before doing H/W init. This register is
5195 	 * cleared by the H/W upon H/W reset
5196 	 */
5197 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5198 
5199 	/* Perform read from the device to make sure device is up */
5200 	RREG32(mmHW_STATE);
5201 
5202 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5203 	 * So we set it here and if anyone tries to move it later to
5204 	 * a different address, there will be an error
5205 	 */
5206 	if (hdev->asic_prop.iatu_done_by_fw)
5207 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5208 
5209 	/*
5210 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5211 	 * base address of dram
5212 	 */
5213 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5214 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5215 		return -EIO;
5216 	}
5217 
5218 	rc = gaudi2_init_cpu(hdev);
5219 	if (rc) {
5220 		dev_err(hdev->dev, "failed to initialize CPU\n");
5221 		return rc;
5222 	}
5223 
5224 	gaudi2_init_scrambler_hbm(hdev);
5225 	gaudi2_init_kdma(hdev);
5226 
5227 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5228 	if (rc) {
5229 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5230 		return rc;
5231 	}
5232 
5233 	rc = gaudi2->cpucp_info_get(hdev);
5234 	if (rc) {
5235 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5236 		return rc;
5237 	}
5238 
5239 	rc = gaudi2_mmu_init(hdev);
5240 	if (rc)
5241 		return rc;
5242 
5243 	gaudi2_init_pdma(hdev);
5244 	gaudi2_init_edma(hdev);
5245 	gaudi2_init_sm(hdev);
5246 	gaudi2_init_tpc(hdev);
5247 	gaudi2_init_mme(hdev);
5248 	gaudi2_init_rotator(hdev);
5249 	gaudi2_init_dec(hdev);
5250 	gaudi2_enable_timestamp(hdev);
5251 
5252 	rc = gaudi2_coresight_init(hdev);
5253 	if (rc)
5254 		goto disable_queues;
5255 
5256 	rc = gaudi2_enable_msix(hdev);
5257 	if (rc)
5258 		goto disable_queues;
5259 
5260 	/* Perform read from the device to flush all configuration */
5261 	RREG32(mmHW_STATE);
5262 
5263 	return 0;
5264 
5265 disable_queues:
5266 	gaudi2_disable_dma_qmans(hdev);
5267 	gaudi2_disable_mme_qmans(hdev);
5268 	gaudi2_disable_tpc_qmans(hdev);
5269 	gaudi2_disable_rot_qmans(hdev);
5270 	gaudi2_disable_nic_qmans(hdev);
5271 
5272 	gaudi2_disable_timestamp(hdev);
5273 
5274 	return rc;
5275 }
5276 
5277 /**
5278  * gaudi2_send_hard_reset_cmd - common function to handle reset
5279  *
5280  * @hdev: pointer to the habanalabs device structure
5281  *
5282  * This function handles the various possible scenarios for reset.
5283  * It considers if reset is handled by driver\FW and what FW components are loaded
5284  */
5285 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5286 {
5287 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5288 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
5289 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5290 	u32 cpu_boot_status;
5291 
5292 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5293 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5294 
5295 	/*
5296 	 * Handle corner case where failure was at cpu management app load,
5297 	 * and driver didn't detect any failure while loading the FW,
5298 	 * then at such scenario driver will send only HALT_MACHINE
5299 	 * and no one will respond to this request since FW already back to preboot
5300 	 * and it cannot handle such cmd.
5301 	 * In this case next time the management app loads it'll check on events register
5302 	 * which will still have the halt indication, and will reboot the device.
5303 	 * The solution is to let preboot clear all relevant registers before next boot
5304 	 * once driver send COMMS_RST_DEV.
5305 	 */
5306 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5307 
5308 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5309 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5310 		cpu_initialized = true;
5311 
5312 	/*
5313 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5314 	 * 1. FW reset: FW initiate the reset sequence
5315 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
5316 	 *                  reset but not the reset itself as it is not implemented
5317 	 *                  on their part) and LKD will wait to let FW complete the
5318 	 *                  sequence before issuing the reset
5319 	 */
5320 	if (!preboot_only && cpu_initialized) {
5321 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5322 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5323 
5324 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5325 	}
5326 
5327 	/*
5328 	 * When working with preboot (without Linux/Boot fit) we can
5329 	 * communicate only using the COMMS commands to issue halt/reset.
5330 	 *
5331 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5332 	 * attempt to revive the card in the small chance that the f/w has
5333 	 * experienced a watchdog event, which caused it to return back to preboot.
5334 	 * In that case, triggering reset through GIC won't help. We need to
5335 	 * trigger the reset as if Linux wasn't loaded.
5336 	 *
5337 	 * We do it only if the reset cause was HB, because that would be the
5338 	 * indication of such an event.
5339 	 *
5340 	 * In case watchdog hasn't expired but we still got HB, then this won't
5341 	 * do any damage.
5342 	 */
5343 
5344 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
5345 		if (hdev->asic_prop.hard_reset_done_by_fw)
5346 			hl_fw_ask_hard_reset_without_linux(hdev);
5347 		else
5348 			hl_fw_ask_halt_machine_without_linux(hdev);
5349 	}
5350 }
5351 
5352 /**
5353  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5354  *
5355  * @hdev: pointer to the habanalabs device structure
5356  * @reset_sleep_ms: sleep time in msec after reset
5357  *
5358  * This function executes hard reset based on if driver/FW should do the reset
5359  */
5360 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5361 {
5362 	if (hdev->asic_prop.hard_reset_done_by_fw) {
5363 		gaudi2_send_hard_reset_cmd(hdev);
5364 		return;
5365 	}
5366 
5367 	/* Set device to handle FLR by H/W as we will put the device
5368 	 * CPU to halt mode
5369 	 */
5370 	WREG32(mmPCIE_AUX_FLR_CTRL,
5371 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5372 
5373 	gaudi2_send_hard_reset_cmd(hdev);
5374 
5375 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5376 }
5377 
5378 /**
5379  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5380  *
5381  * @hdev: pointer to the habanalabs device structure
5382  * @reset_sleep_ms: sleep time in msec after reset
5383  * @driver_performs_reset: true if driver should perform reset instead of f/w.
5384  *
5385  * This function executes soft reset based on if driver/FW should do the reset
5386  */
5387 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5388 						bool driver_performs_reset)
5389 {
5390 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5391 
5392 	if (!driver_performs_reset) {
5393 		/* set SP to indicate reset request sent to FW */
5394 		if (dyn_regs->cpu_rst_status)
5395 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5396 		else
5397 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5398 
5399 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5400 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5401 		return;
5402 	}
5403 
5404 	/* Block access to engines, QMANs and SM during reset, these
5405 	 * RRs will be reconfigured after soft reset.
5406 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5407 	 */
5408 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5409 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5410 
5411 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5412 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5413 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5414 
5415 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5416 }
5417 
5418 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5419 								u32 poll_timeout_us)
5420 {
5421 	int i, rc = 0;
5422 	u32 reg_val;
5423 
5424 	/* without this sleep reset will not work */
5425 	msleep(reset_sleep_ms);
5426 
5427 	/* We poll the BTM done indication multiple times after reset due to
5428 	 * a HW errata 'GAUDI2_0300'
5429 	 */
5430 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5431 		rc = hl_poll_timeout(
5432 			hdev,
5433 			mmPSOC_GLOBAL_CONF_BTM_FSM,
5434 			reg_val,
5435 			reg_val == 0,
5436 			1000,
5437 			poll_timeout_us);
5438 
5439 	if (rc)
5440 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5441 }
5442 
5443 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5444 {
5445 	int i, rc = 0;
5446 	u32 reg_val;
5447 
5448 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5449 		rc = hl_poll_timeout(
5450 			hdev,
5451 			mmCPU_RST_STATUS_TO_HOST,
5452 			reg_val,
5453 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5454 			1000,
5455 			poll_timeout_us);
5456 
5457 	if (rc)
5458 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5459 				reg_val);
5460 }
5461 
5462 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5463 {
5464 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5465 	u32 poll_timeout_us, reset_sleep_ms;
5466 	bool driver_performs_reset = false;
5467 
5468 	if (hdev->pldm) {
5469 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5470 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5471 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5472 	} else {
5473 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5474 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5475 	}
5476 
5477 	if (fw_reset)
5478 		goto skip_reset;
5479 
5480 	gaudi2_reset_arcs(hdev);
5481 
5482 	if (hard_reset) {
5483 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5484 		gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5485 	} else {
5486 		/*
5487 		 * As we have to support also work with preboot only (which does not supports
5488 		 * soft reset) we have to make sure that security is disabled before letting driver
5489 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5490 		 * secured device with preboot only.
5491 		 */
5492 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5493 							!hdev->asic_prop.fw_security_enabled);
5494 		gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5495 	}
5496 
5497 skip_reset:
5498 	if (driver_performs_reset || hard_reset)
5499 		/*
5500 		 * Instead of waiting for BTM indication we should wait for preboot ready:
5501 		 * Consider the below scenario:
5502 		 * 1. FW update is being triggered
5503 		 *        - setting the dirty bit
5504 		 * 2. hard reset will be triggered due to the dirty bit
5505 		 * 3. FW initiates the reset:
5506 		 *        - dirty bit cleared
5507 		 *        - BTM indication cleared
5508 		 *        - preboot ready indication cleared
5509 		 * 4. during hard reset:
5510 		 *        - BTM indication will be set
5511 		 *        - BIST test performed and another reset triggered
5512 		 * 5. only after this reset the preboot will set the preboot ready
5513 		 *
5514 		 * when polling on BTM indication alone we can lose sync with FW while trying to
5515 		 * communicate with FW that is during reset.
5516 		 * to overcome this we will always wait to preboot ready indication
5517 		 */
5518 		if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) {
5519 			msleep(reset_sleep_ms);
5520 			hl_fw_wait_preboot_ready(hdev);
5521 		} else {
5522 			gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5523 		}
5524 	else
5525 		gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5526 
5527 	if (!gaudi2)
5528 		return;
5529 
5530 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5531 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5532 
5533 	/*
5534 	 * Clear NIC capability mask in order for driver to re-configure
5535 	 * NIC QMANs. NIC ports will not be re-configured during soft
5536 	 * reset as we call gaudi2_nic_init only during hard reset
5537 	 */
5538 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5539 
5540 	if (hard_reset) {
5541 		gaudi2->hw_cap_initialized &=
5542 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5543 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5544 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5545 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5546 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5547 
5548 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5549 	} else {
5550 		gaudi2->hw_cap_initialized &=
5551 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5552 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5553 			HW_CAP_ROT_MASK);
5554 	}
5555 }
5556 
5557 static int gaudi2_suspend(struct hl_device *hdev)
5558 {
5559 	int rc;
5560 
5561 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5562 	if (rc)
5563 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5564 
5565 	return rc;
5566 }
5567 
5568 static int gaudi2_resume(struct hl_device *hdev)
5569 {
5570 	return gaudi2_init_iatu(hdev);
5571 }
5572 
5573 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5574 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
5575 {
5576 	int rc;
5577 
5578 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5579 			VM_DONTCOPY | VM_NORESERVE;
5580 
5581 #ifdef _HAS_DMA_MMAP_COHERENT
5582 
5583 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5584 	if (rc)
5585 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5586 
5587 #else
5588 
5589 	rc = remap_pfn_range(vma, vma->vm_start,
5590 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5591 				size, vma->vm_page_prot);
5592 	if (rc)
5593 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5594 
5595 #endif
5596 
5597 	return rc;
5598 }
5599 
5600 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5601 {
5602 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5603 	u64 hw_cap_mask = 0;
5604 	u64 hw_tpc_cap_bit = 0;
5605 	u64 hw_nic_cap_bit = 0;
5606 	u64 hw_test_cap_bit = 0;
5607 
5608 	switch (hw_queue_id) {
5609 	case GAUDI2_QUEUE_ID_PDMA_0_0:
5610 	case GAUDI2_QUEUE_ID_PDMA_0_1:
5611 	case GAUDI2_QUEUE_ID_PDMA_1_0:
5612 		hw_cap_mask = HW_CAP_PDMA_MASK;
5613 		break;
5614 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5615 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5616 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5617 		break;
5618 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5619 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5620 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5621 		break;
5622 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5623 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5624 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5625 		break;
5626 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5627 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5628 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5629 		break;
5630 
5631 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5632 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
5633 		break;
5634 
5635 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5636 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5637 		break;
5638 
5639 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5640 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5641 		break;
5642 
5643 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5644 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5645 		break;
5646 
5647 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5648 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5649 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5650 
5651 		/* special case where cap bit refers to the first queue id */
5652 		if (!hw_tpc_cap_bit)
5653 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5654 		break;
5655 
5656 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5657 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5658 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5659 		break;
5660 
5661 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5662 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5663 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5664 		break;
5665 
5666 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5667 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5668 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5669 		break;
5670 
5671 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5672 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5673 		break;
5674 
5675 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5676 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5677 		break;
5678 
5679 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5680 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5681 
5682 		/* special case where cap bit refers to the first queue id */
5683 		if (!hw_nic_cap_bit)
5684 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5685 		break;
5686 
5687 	case GAUDI2_QUEUE_ID_CPU_PQ:
5688 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5689 
5690 	default:
5691 		return false;
5692 	}
5693 
5694 	if (hw_tpc_cap_bit)
5695 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5696 
5697 	if (hw_nic_cap_bit)
5698 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5699 
5700 	if (hw_test_cap_bit)
5701 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5702 
5703 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5704 }
5705 
5706 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5707 {
5708 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5709 
5710 	switch (arc_id) {
5711 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5712 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5713 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5714 
5715 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5716 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5717 
5718 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5719 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5720 
5721 	default:
5722 		return false;
5723 	}
5724 }
5725 
5726 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5727 {
5728 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5729 
5730 	switch (arc_id) {
5731 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5732 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5733 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5734 		break;
5735 
5736 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5737 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5738 		break;
5739 
5740 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5741 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5742 		break;
5743 
5744 	default:
5745 		return;
5746 	}
5747 }
5748 
5749 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5750 {
5751 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5752 
5753 	switch (arc_id) {
5754 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5755 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5756 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5757 		break;
5758 
5759 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5760 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5761 		break;
5762 
5763 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5764 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5765 		break;
5766 
5767 	default:
5768 		return;
5769 	}
5770 }
5771 
5772 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5773 {
5774 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5775 	u32 pq_offset, reg_base, db_reg_offset, db_value;
5776 
5777 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5778 		/*
5779 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5780 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5781 		 * number.
5782 		 */
5783 		pq_offset = (hw_queue_id & 0x3) * 4;
5784 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5785 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5786 	} else {
5787 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
5788 	}
5789 
5790 	db_value = pi;
5791 
5792 	/* ring the doorbell */
5793 	WREG32(db_reg_offset, db_value);
5794 
5795 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5796 		/* make sure device CPU will read latest data from host */
5797 		mb();
5798 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5799 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5800 	}
5801 }
5802 
5803 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5804 {
5805 	__le64 *pbd = (__le64 *) bd;
5806 
5807 	/* The QMANs are on the host memory so a simple copy suffice */
5808 	pqe[0] = pbd[0];
5809 	pqe[1] = pbd[1];
5810 }
5811 
5812 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5813 				dma_addr_t *dma_handle, gfp_t flags)
5814 {
5815 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5816 }
5817 
5818 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5819 				void *cpu_addr, dma_addr_t dma_handle)
5820 {
5821 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5822 }
5823 
5824 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5825 				u32 timeout, u64 *result)
5826 {
5827 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5828 
5829 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5830 		if (result)
5831 			*result = 0;
5832 		return 0;
5833 	}
5834 
5835 	if (!timeout)
5836 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5837 
5838 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5839 }
5840 
5841 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5842 				gfp_t mem_flags, dma_addr_t *dma_handle)
5843 {
5844 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5845 		return NULL;
5846 
5847 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5848 }
5849 
5850 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5851 {
5852 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5853 }
5854 
5855 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5856 						dma_addr_t *dma_handle)
5857 {
5858 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5859 }
5860 
5861 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5862 {
5863 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5864 }
5865 
5866 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5867 					enum dma_data_direction dir)
5868 {
5869 	dma_addr_t dma_addr;
5870 
5871 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5872 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5873 		return 0;
5874 
5875 	return dma_addr;
5876 }
5877 
5878 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5879 					enum dma_data_direction dir)
5880 {
5881 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5882 }
5883 
5884 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5885 {
5886 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5887 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5888 
5889 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5890 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5891 		return -EINVAL;
5892 	}
5893 
5894 	/* Just check if CB address is valid */
5895 
5896 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5897 					parser->user_cb_size,
5898 					asic_prop->sram_user_base_address,
5899 					asic_prop->sram_end_address))
5900 		return 0;
5901 
5902 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5903 					parser->user_cb_size,
5904 					asic_prop->dram_user_base_address,
5905 					asic_prop->dram_end_address))
5906 		return 0;
5907 
5908 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5909 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5910 						parser->user_cb_size,
5911 						asic_prop->dmmu.start_addr,
5912 						asic_prop->dmmu.end_addr))
5913 		return 0;
5914 
5915 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5916 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5917 					parser->user_cb_size,
5918 					asic_prop->pmmu.start_addr,
5919 					asic_prop->pmmu.end_addr) ||
5920 			hl_mem_area_inside_range(
5921 					(u64) (uintptr_t) parser->user_cb,
5922 					parser->user_cb_size,
5923 					asic_prop->pmmu_huge.start_addr,
5924 					asic_prop->pmmu_huge.end_addr))
5925 			return 0;
5926 
5927 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5928 		if (!hdev->pdev)
5929 			return 0;
5930 
5931 		if (!device_iommu_mapped(&hdev->pdev->dev))
5932 			return 0;
5933 	}
5934 
5935 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5936 		parser->user_cb, parser->user_cb_size);
5937 
5938 	return -EFAULT;
5939 }
5940 
5941 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5942 {
5943 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5944 
5945 	if (!parser->is_kernel_allocated_cb)
5946 		return gaudi2_validate_cb_address(hdev, parser);
5947 
5948 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5949 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5950 		return -EINVAL;
5951 	}
5952 
5953 	return 0;
5954 }
5955 
5956 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5957 {
5958 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5959 
5960 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5961 		return 0;
5962 
5963 	return hl_fw_send_heartbeat(hdev);
5964 }
5965 
5966 /* This is an internal helper function, used to update the KDMA mmu props.
5967  * Should be called with a proper kdma lock.
5968  */
5969 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5970 					   bool mmu_bypass, u32 asid)
5971 {
5972 	u32 rw_asid, rw_mmu_bp;
5973 
5974 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5975 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
5976 
5977 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
5978 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
5979 
5980 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
5981 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
5982 }
5983 
5984 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
5985 						u32 mon_payload, u32 sync_value)
5986 {
5987 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
5988 	u8 mask;
5989 
5990 	sob_offset = sob_id * 4;
5991 	mon_offset = mon_id * 4;
5992 
5993 	/* Reset the SOB value */
5994 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5995 
5996 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
5997 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
5998 
5999 	/* Configure this address with CS index because CQ_EN is set */
6000 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6001 
6002 	sync_group_id = sob_id / 8;
6003 	mask = ~(1 << (sob_id & 0x7));
6004 	mode = 1; /* comparison mode is "equal to" */
6005 
6006 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6007 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6008 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6009 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6010 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6011 }
6012 
6013 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6014 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6015 					u64 src_addr, u64 dst_addr,
6016 					u32 size, bool is_memset)
6017 {
6018 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6019 	struct hl_cq_entry *cq_base;
6020 	struct hl_cq *cq;
6021 	u64 comp_addr;
6022 	int rc;
6023 
6024 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6025 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6026 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6027 
6028 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6029 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6030 
6031 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6032 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6033 
6034 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6035 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6036 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6037 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6038 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6039 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6040 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6041 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6042 
6043 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6044 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6045 
6046 	if (is_memset)
6047 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6048 
6049 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6050 
6051 	/* Wait for completion */
6052 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6053 	cq_base = cq->kernel_address;
6054 	polling_addr = (u32 *)&cq_base[cq->ci];
6055 
6056 	if (hdev->pldm)
6057 		/* for each 1MB 20 second of timeout */
6058 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6059 	else
6060 		timeout = KDMA_TIMEOUT_USEC;
6061 
6062 	/* Polling */
6063 	rc = hl_poll_timeout_memory(
6064 			hdev,
6065 			polling_addr,
6066 			status,
6067 			(status == 1),
6068 			1000,
6069 			timeout,
6070 			true);
6071 
6072 	*polling_addr = 0;
6073 
6074 	if (rc) {
6075 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6076 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6077 		return rc;
6078 	}
6079 
6080 	cq->ci = hl_cq_inc_ptr(cq->ci);
6081 
6082 	return 0;
6083 }
6084 
6085 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6086 {
6087 	u32 i;
6088 
6089 	for (i = 0 ; i < size ; i += sizeof(u32))
6090 		WREG32(addr + i, val);
6091 }
6092 
6093 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6094 {
6095 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6096 
6097 	if (enable) {
6098 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6099 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6100 	} else {
6101 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6102 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6103 	}
6104 }
6105 
6106 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6107 {
6108 	u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6109 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6110 	u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6111 	struct packet_msg_short *msg_short_pkt;
6112 	dma_addr_t pkt_dma_addr;
6113 	size_t pkt_size;
6114 	int rc;
6115 
6116 	if (hdev->pldm)
6117 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6118 	else
6119 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6120 
6121 	pkt_size = sizeof(*msg_short_pkt);
6122 	msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6123 	if (!msg_short_pkt) {
6124 		dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6125 			hw_queue_id);
6126 		return -ENOMEM;
6127 	}
6128 
6129 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6130 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6131 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6132 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6133 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6134 
6135 	msg_short_pkt->value = cpu_to_le32(sob_val);
6136 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6137 
6138 	/* Reset the SOB value */
6139 	WREG32(sob_addr, 0);
6140 
6141 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6142 	if (rc) {
6143 		dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6144 			hw_queue_id);
6145 		goto free_pkt;
6146 	}
6147 
6148 	rc = hl_poll_timeout(
6149 			hdev,
6150 			sob_addr,
6151 			tmp,
6152 			(tmp == sob_val),
6153 			1000,
6154 			timeout_usec);
6155 
6156 	if (rc == -ETIMEDOUT) {
6157 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6158 			hw_queue_id, tmp);
6159 		rc = -EIO;
6160 	}
6161 
6162 	/* Reset the SOB value */
6163 	WREG32(sob_addr, 0);
6164 
6165 free_pkt:
6166 	hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6167 	return rc;
6168 }
6169 
6170 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6171 {
6172 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6173 
6174 	/*
6175 	 * check capability here as send_cpu_message() won't update the result
6176 	 * value if no capability
6177 	 */
6178 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6179 		return 0;
6180 
6181 	return hl_fw_test_cpu_queue(hdev);
6182 }
6183 
6184 static int gaudi2_test_queues(struct hl_device *hdev)
6185 {
6186 	int i, rc, ret_val = 0;
6187 
6188 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6189 		if (!gaudi2_is_queue_enabled(hdev, i))
6190 			continue;
6191 
6192 		gaudi2_qman_set_test_mode(hdev, i, true);
6193 		rc = gaudi2_test_queue(hdev, i);
6194 		gaudi2_qman_set_test_mode(hdev, i, false);
6195 
6196 		if (rc) {
6197 			ret_val = -EINVAL;
6198 			goto done;
6199 		}
6200 	}
6201 
6202 	rc = gaudi2_test_cpu_queue(hdev);
6203 	if (rc) {
6204 		ret_val = -EINVAL;
6205 		goto done;
6206 	}
6207 
6208 done:
6209 	return ret_val;
6210 }
6211 
6212 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6213 {
6214 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6215 	size_t irq_arr_size;
6216 
6217 	/* TODO: missing gaudi2_nic_resume.
6218 	 * Until implemented nic_hw_cap_initialized will remain zeroed
6219 	 */
6220 	gaudi2_init_arcs(hdev);
6221 	gaudi2_scrub_arcs_dccm(hdev);
6222 	gaudi2_init_security(hdev);
6223 
6224 	/* Unmask all IRQs since some could have been received during the soft reset */
6225 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6226 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6227 }
6228 
6229 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6230 					struct iterate_module_ctx *ctx)
6231 {
6232 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6233 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6234 	bool is_eng_idle;
6235 	int engine_idx;
6236 
6237 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6238 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6239 	else
6240 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6241 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6242 
6243 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6244 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6245 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6246 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6247 
6248 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6249 						IS_TPC_IDLE(tpc_cfg_sts);
6250 	*(idle_data->is_idle) &= is_eng_idle;
6251 
6252 	if (idle_data->mask && !is_eng_idle)
6253 		set_bit(engine_idx, idle_data->mask);
6254 
6255 	if (idle_data->e)
6256 		hl_engine_data_sprintf(idle_data->e,
6257 					idle_data->tpc_fmt, dcore, inst,
6258 					is_eng_idle ? "Y" : "N",
6259 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6260 }
6261 
6262 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6263 					struct engines_data *e)
6264 {
6265 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6266 		mme_arch_sts, dec_swreg15, dec_enabled_bit;
6267 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6268 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6269 	unsigned long *mask = (unsigned long *) mask_arr;
6270 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6271 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6272 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6273 	const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6274 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6275 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6276 	bool is_idle = true, is_eng_idle;
6277 	u64 offset;
6278 
6279 	struct gaudi2_tpc_idle_data tpc_idle_data = {
6280 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6281 		.e = e,
6282 		.mask = mask,
6283 		.is_idle = &is_idle,
6284 	};
6285 	struct iterate_module_ctx tpc_iter = {
6286 		.fn = &gaudi2_is_tpc_engine_idle,
6287 		.data = &tpc_idle_data,
6288 	};
6289 
6290 	int engine_idx, i, j;
6291 
6292 	/* EDMA, Two engines per Dcore */
6293 	if (e)
6294 		hl_engine_data_sprintf(e,
6295 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6296 			"----  ----  -------  ------------  ----------------------\n");
6297 
6298 	for (i = 0; i < NUM_OF_DCORES; i++) {
6299 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6300 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6301 
6302 			if (!(prop->edma_enabled_mask & BIT(seq)))
6303 				continue;
6304 
6305 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6306 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6307 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6308 
6309 			dma_core_idle_ind_mask =
6310 			RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6311 
6312 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6313 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6314 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6315 
6316 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6317 					IS_DMA_IDLE(dma_core_idle_ind_mask);
6318 			is_idle &= is_eng_idle;
6319 
6320 			if (mask && !is_eng_idle)
6321 				set_bit(engine_idx, mask);
6322 
6323 			if (e)
6324 				hl_engine_data_sprintf(e, edma_fmt, i, j,
6325 							is_eng_idle ? "Y" : "N",
6326 							qm_glbl_sts0,
6327 							dma_core_idle_ind_mask);
6328 		}
6329 	}
6330 
6331 	/* PDMA, Two engines in Full chip */
6332 	if (e)
6333 		hl_engine_data_sprintf(e,
6334 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6335 					"----  -------  ------------  ----------------------\n");
6336 
6337 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6338 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6339 		offset = i * PDMA_OFFSET;
6340 		dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6341 
6342 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6343 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6344 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6345 
6346 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6347 				IS_DMA_IDLE(dma_core_idle_ind_mask);
6348 		is_idle &= is_eng_idle;
6349 
6350 		if (mask && !is_eng_idle)
6351 			set_bit(engine_idx, mask);
6352 
6353 		if (e)
6354 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6355 						qm_glbl_sts0, dma_core_idle_ind_mask);
6356 	}
6357 
6358 	/* NIC, twelve macros in Full chip */
6359 	if (e && hdev->nic_ports_mask)
6360 		hl_engine_data_sprintf(e,
6361 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
6362 					"---  -------  ------------  ----------\n");
6363 
6364 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6365 		if (!(i & 1))
6366 			offset = i / 2 * NIC_OFFSET;
6367 		else
6368 			offset += NIC_QM_OFFSET;
6369 
6370 		if (!(hdev->nic_ports_mask & BIT(i)))
6371 			continue;
6372 
6373 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6374 
6375 
6376 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6377 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6378 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6379 
6380 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6381 		is_idle &= is_eng_idle;
6382 
6383 		if (mask && !is_eng_idle)
6384 			set_bit(engine_idx, mask);
6385 
6386 		if (e)
6387 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6388 						qm_glbl_sts0, qm_cgm_sts);
6389 	}
6390 
6391 	if (e)
6392 		hl_engine_data_sprintf(e,
6393 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
6394 					"---  ----  -------  ------------  ---------------\n");
6395 	/* MME, one per Dcore */
6396 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6397 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6398 		offset = i * DCORE_OFFSET;
6399 
6400 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6401 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6402 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6403 
6404 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6405 		is_idle &= is_eng_idle;
6406 
6407 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6408 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6409 		is_idle &= is_eng_idle;
6410 
6411 		if (e)
6412 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
6413 				is_eng_idle ? "Y" : "N",
6414 				qm_glbl_sts0,
6415 				mme_arch_sts);
6416 
6417 		if (mask && !is_eng_idle)
6418 			set_bit(engine_idx, mask);
6419 	}
6420 
6421 	/*
6422 	 * TPC
6423 	 */
6424 	if (e && prop->tpc_enabled_mask)
6425 		hl_engine_data_sprintf(e,
6426 			"\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
6427 			"----  ---  --------  ------------  ----------  ----------------------\n");
6428 
6429 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6430 
6431 	/* Decoders, two each Dcore and two shared PCIe decoders */
6432 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6433 		hl_engine_data_sprintf(e,
6434 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
6435 			"----  ---  -------  ---------------\n");
6436 
6437 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6438 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6439 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6440 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6441 				continue;
6442 
6443 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6444 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6445 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6446 
6447 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6448 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6449 			is_idle &= is_eng_idle;
6450 
6451 			if (mask && !is_eng_idle)
6452 				set_bit(engine_idx, mask);
6453 
6454 			if (e)
6455 				hl_engine_data_sprintf(e, dec_fmt, i, j,
6456 							is_eng_idle ? "Y" : "N", dec_swreg15);
6457 		}
6458 	}
6459 
6460 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6461 		hl_engine_data_sprintf(e,
6462 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
6463 			"--------  -------  ---------------\n");
6464 
6465 	/* Check shared(PCIe) decoders */
6466 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6467 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
6468 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6469 			continue;
6470 
6471 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6472 		offset = i * DCORE_DEC_OFFSET;
6473 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6474 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6475 		is_idle &= is_eng_idle;
6476 
6477 		if (mask && !is_eng_idle)
6478 			set_bit(engine_idx, mask);
6479 
6480 		if (e)
6481 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6482 						is_eng_idle ? "Y" : "N", dec_swreg15);
6483 	}
6484 
6485 	if (e)
6486 		hl_engine_data_sprintf(e,
6487 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6488 			"----  ----  -------  ------------  ----------  -------------\n");
6489 
6490 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6491 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6492 
6493 		offset = i * ROT_OFFSET;
6494 
6495 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6496 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6497 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6498 
6499 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6500 		is_idle &= is_eng_idle;
6501 
6502 		if (mask && !is_eng_idle)
6503 			set_bit(engine_idx, mask);
6504 
6505 		if (e)
6506 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6507 					qm_glbl_sts0, qm_cgm_sts, "-");
6508 	}
6509 
6510 	return is_idle;
6511 }
6512 
6513 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6514 	__acquires(&gaudi2->hw_queues_lock)
6515 {
6516 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6517 
6518 	spin_lock(&gaudi2->hw_queues_lock);
6519 }
6520 
6521 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6522 	__releases(&gaudi2->hw_queues_lock)
6523 {
6524 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6525 
6526 	spin_unlock(&gaudi2->hw_queues_lock);
6527 }
6528 
6529 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6530 {
6531 	return hdev->pdev->device;
6532 }
6533 
6534 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6535 {
6536 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6537 
6538 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6539 		return 0;
6540 
6541 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6542 }
6543 
6544 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6545 {
6546 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6547 }
6548 
6549 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6550 {
6551 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6552 
6553 	if (aggregate) {
6554 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
6555 		return gaudi2->events_stat_aggregate;
6556 	}
6557 
6558 	*size = (u32) sizeof(gaudi2->events_stat);
6559 	return gaudi2->events_stat;
6560 }
6561 
6562 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6563 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6564 {
6565 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6566 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
6567 
6568 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6569 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6570 
6571 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6572 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6573 
6574 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6575 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6576 
6577 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6578 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6579 
6580 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6581 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6582 }
6583 
6584 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6585 {
6586 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6587 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6588 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6589 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
6590 	u32 vdec_id, i, ports_offset, reg_val;
6591 	u8 edma_seq_base;
6592 
6593 	/* EDMA */
6594 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6595 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6596 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6597 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6598 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6599 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6600 	}
6601 
6602 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6603 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6604 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6605 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6606 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6607 	}
6608 
6609 	/* Sync Mngr */
6610 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6611 	/*
6612 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6613 	 * for any access type
6614 	 */
6615 	if (dcore_id > 0) {
6616 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6617 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6618 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6619 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6620 	}
6621 
6622 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6623 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6624 
6625 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6626 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
6627 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6628 				dcore_offset + ports_offset, 0);
6629 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6630 				dcore_offset + ports_offset, rw_asid);
6631 	}
6632 
6633 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6634 		ports_offset = i * DCORE_MME_WB_OFFSET;
6635 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6636 				dcore_offset + ports_offset, 0);
6637 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6638 				dcore_offset + ports_offset, rw_asid);
6639 	}
6640 
6641 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6642 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6643 
6644 	/*
6645 	 * Decoders
6646 	 */
6647 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6648 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6649 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6650 	}
6651 }
6652 
6653 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6654 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6655 {
6656 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6657 
6658 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6659 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6660 
6661 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6662 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6663 
6664 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6665 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6666 
6667 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6668 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6669 
6670 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6671 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6672 }
6673 
6674 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6675 							u32 rw_asid, u32 rw_mmu_bp)
6676 {
6677 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6678 
6679 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6680 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6681 }
6682 
6683 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6684 {
6685 	u32 reg_base, reg_offset, reg_val = 0;
6686 
6687 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
6688 
6689 	/* Enable MMU and configure asid for all relevant ARC regions */
6690 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6691 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6692 
6693 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6694 	WREG32(reg_base + reg_offset, reg_val);
6695 
6696 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6697 	WREG32(reg_base + reg_offset, reg_val);
6698 
6699 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6700 	WREG32(reg_base + reg_offset, reg_val);
6701 
6702 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6703 	WREG32(reg_base + reg_offset, reg_val);
6704 
6705 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6706 	WREG32(reg_base + reg_offset, reg_val);
6707 
6708 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6709 	WREG32(reg_base + reg_offset, reg_val);
6710 
6711 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6712 	WREG32(reg_base + reg_offset, reg_val);
6713 
6714 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6715 	WREG32(reg_base + reg_offset, reg_val);
6716 
6717 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6718 	WREG32(reg_base + reg_offset, reg_val);
6719 
6720 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6721 	WREG32(reg_base + reg_offset, reg_val);
6722 
6723 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6724 	WREG32(reg_base + reg_offset, reg_val);
6725 }
6726 
6727 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6728 {
6729 	int i;
6730 
6731 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6732 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6733 
6734 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6735 		gaudi2_arc_mmu_prepare(hdev, i, asid);
6736 
6737 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6738 		if (!gaudi2_is_queue_enabled(hdev, i))
6739 			continue;
6740 
6741 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6742 	}
6743 
6744 	return 0;
6745 }
6746 
6747 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6748 {
6749 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6750 	u32 rw_asid, offset;
6751 	int rc, i;
6752 
6753 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6754 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6755 
6756 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6757 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6758 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6759 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6760 
6761 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6762 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6763 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6764 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6765 
6766 	/* ROT */
6767 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6768 		offset = i * ROT_OFFSET;
6769 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6770 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6771 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6772 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6773 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6774 	}
6775 
6776 	/* Shared Decoders are the last bits in the decoders mask */
6777 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6778 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6779 
6780 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6781 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6782 
6783 	/* arc farm arc dup eng */
6784 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6785 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6786 
6787 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6788 	if (rc)
6789 		return rc;
6790 
6791 	return 0;
6792 }
6793 
6794 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
6795 					struct iterate_module_ctx *ctx)
6796 {
6797 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6798 
6799 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6800 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6801 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6802 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6803 }
6804 
6805 /* zero the MMUBP and set the ASID */
6806 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6807 {
6808 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6809 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
6810 	struct iterate_module_ctx tpc_iter = {
6811 		.fn = &gaudi2_tpc_mmu_prepare,
6812 		.data = &tpc_mmu_data,
6813 	};
6814 	int rc, i;
6815 
6816 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6817 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6818 		return -EINVAL;
6819 	}
6820 
6821 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6822 		return 0;
6823 
6824 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
6825 	if (rc)
6826 		return rc;
6827 
6828 	/* configure DCORE MMUs */
6829 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6830 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6831 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6832 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
6833 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
6834 
6835 	return 0;
6836 }
6837 
6838 static inline bool is_info_event(u32 event)
6839 {
6840 	switch (event) {
6841 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6842 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
6843 
6844 	/* return in case of NIC status event - these events are received periodically and not as
6845 	 * an indication to an error.
6846 	 */
6847 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
6848 		return true;
6849 	default:
6850 		return false;
6851 	}
6852 }
6853 
6854 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
6855 			bool ratelimited, const char *fmt, ...)
6856 {
6857 	struct va_format vaf;
6858 	va_list args;
6859 
6860 	va_start(args, fmt);
6861 	vaf.fmt = fmt;
6862 	vaf.va = &args;
6863 
6864 	if (ratelimited)
6865 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
6866 			gaudi2_irq_map_table[event_type].valid ?
6867 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
6868 	else
6869 		dev_err(hdev->dev, "%s: %pV\n",
6870 			gaudi2_irq_map_table[event_type].valid ?
6871 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
6872 
6873 	va_end(args);
6874 }
6875 
6876 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6877 		struct hl_eq_ecc_data *ecc_data)
6878 {
6879 	u64 ecc_address = 0, ecc_syndrom = 0;
6880 	u8 memory_wrapper_idx = 0;
6881 
6882 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
6883 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6884 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6885 
6886 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
6887 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6888 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6889 
6890 	return !!ecc_data->is_critical;
6891 }
6892 
6893 /*
6894  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6895  *
6896  * @idx: the current pi/ci value
6897  * @q_len: the queue length (power of 2)
6898  *
6899  * @return the cyclically decremented index
6900  */
6901 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6902 {
6903 	u32 mask = q_len - 1;
6904 
6905 	/*
6906 	 * modular decrement is equivalent to adding (queue_size -1)
6907 	 * later we take LSBs to make sure the value is in the
6908 	 * range [0, queue_len - 1]
6909 	 */
6910 	return (idx + q_len - 1) & mask;
6911 }
6912 
6913 /**
6914  * gaudi2_print_sw_config_stream_data - print SW config stream data
6915  *
6916  * @hdev: pointer to the habanalabs device structure
6917  * @stream: the QMAN's stream
6918  * @qman_base: base address of QMAN registers block
6919  */
6920 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6921 						u32 stream, u64 qman_base)
6922 {
6923 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6924 	u32 cq_ptr_lo_off, size;
6925 
6926 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6927 
6928 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6929 									stream * cq_ptr_lo_off;
6930 
6931 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6932 
6933 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6934 
6935 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6936 	size = RREG32(cq_tsize);
6937 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6938 		stream, cq_ptr, size);
6939 }
6940 
6941 /**
6942  * gaudi2_print_last_pqes_on_err - print last PQEs on error
6943  *
6944  * @hdev: pointer to the habanalabs device structure
6945  * @qid_base: first QID of the QMAN (out of 4 streams)
6946  * @stream: the QMAN's stream
6947  * @qman_base: base address of QMAN registers block
6948  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6949  */
6950 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6951 						u64 qman_base, bool pr_sw_conf)
6952 {
6953 	u32 ci, qm_ci_stream_off;
6954 	struct hl_hw_queue *q;
6955 	u64 pq_ci;
6956 	int i;
6957 
6958 	q = &hdev->kernel_queues[qid_base + stream];
6959 
6960 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6961 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6962 						stream * qm_ci_stream_off;
6963 
6964 	hdev->asic_funcs->hw_queues_lock(hdev);
6965 
6966 	if (pr_sw_conf)
6967 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6968 
6969 	ci = RREG32(pq_ci);
6970 
6971 	/* we should start printing form ci -1 */
6972 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6973 
6974 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6975 		struct hl_bd *bd;
6976 		u64 addr;
6977 		u32 len;
6978 
6979 		bd = q->kernel_address;
6980 		bd += ci;
6981 
6982 		len = le32_to_cpu(bd->len);
6983 		/* len 0 means uninitialized entry- break */
6984 		if (!len)
6985 			break;
6986 
6987 		addr = le64_to_cpu(bd->ptr);
6988 
6989 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
6990 			stream, ci, addr, len);
6991 
6992 		/* get previous ci, wrap if needed */
6993 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6994 	}
6995 
6996 	hdev->asic_funcs->hw_queues_unlock(hdev);
6997 }
6998 
6999 /**
7000  * print_qman_data_on_err - extract QMAN data on error
7001  *
7002  * @hdev: pointer to the habanalabs device structure
7003  * @qid_base: first QID of the QMAN (out of 4 streams)
7004  * @stream: the QMAN's stream
7005  * @qman_base: base address of QMAN registers block
7006  *
7007  * This function attempt to extract as much data as possible on QMAN error.
7008  * On upper CP print the SW config stream data and last 8 PQEs.
7009  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7010  */
7011 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7012 {
7013 	u32 i;
7014 
7015 	if (stream != QMAN_STREAMS) {
7016 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7017 		return;
7018 	}
7019 
7020 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7021 
7022 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7023 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7024 }
7025 
7026 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7027 							u64 qman_base, u32 qid_base)
7028 {
7029 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7030 	u64 glbl_sts_addr, arb_err_addr;
7031 	char reg_desc[32];
7032 
7033 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7034 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7035 
7036 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7037 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7038 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7039 
7040 		if (!glbl_sts_val)
7041 			continue;
7042 
7043 		if (i == QMAN_STREAMS) {
7044 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7045 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7046 		} else {
7047 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7048 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7049 		}
7050 
7051 		for (j = 0 ; j < num_error_causes ; j++)
7052 			if (glbl_sts_val & BIT(j)) {
7053 				gaudi2_print_event(hdev, event_type, true,
7054 					"%s. err cause: %s", reg_desc,
7055 					i == QMAN_STREAMS ?
7056 					gaudi2_qman_lower_cp_error_cause[j] :
7057 					gaudi2_qman_error_cause[j]);
7058 				error_count++;
7059 			}
7060 
7061 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7062 	}
7063 
7064 	arb_err_val = RREG32(arb_err_addr);
7065 
7066 	if (!arb_err_val)
7067 		goto out;
7068 
7069 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7070 		if (arb_err_val & BIT(j)) {
7071 			gaudi2_print_event(hdev, event_type, true,
7072 				"ARB_ERR. err cause: %s",
7073 				gaudi2_qman_arb_error_cause[j]);
7074 			error_count++;
7075 		}
7076 	}
7077 
7078 out:
7079 	return error_count;
7080 }
7081 
7082 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7083 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7084 			enum gaudi2_engine_id id, u64 *event_mask)
7085 {
7086 	u32 razwi_hi, razwi_lo, razwi_xy;
7087 	u16 eng_id = id;
7088 	u8 rd_wr_flag;
7089 
7090 	if (is_write) {
7091 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7092 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7093 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7094 		rd_wr_flag = HL_RAZWI_WRITE;
7095 	} else {
7096 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7097 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7098 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7099 		rd_wr_flag = HL_RAZWI_READ;
7100 	}
7101 
7102 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7103 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7104 
7105 	dev_err_ratelimited(hdev->dev,
7106 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7107 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7108 }
7109 
7110 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7111 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7112 			enum gaudi2_engine_id id, u64 *event_mask)
7113 {
7114 	u32 razwi_addr, razwi_xy;
7115 	u16 eng_id = id;
7116 	u8 rd_wr_flag;
7117 
7118 	if (is_write) {
7119 		razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7120 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7121 		rd_wr_flag = HL_RAZWI_WRITE;
7122 	} else {
7123 		razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7124 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7125 		rd_wr_flag = HL_RAZWI_READ;
7126 	}
7127 
7128 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7129 	dev_err_ratelimited(hdev->dev,
7130 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7131 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7132 						razwi_xy);
7133 }
7134 
7135 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7136 						enum razwi_event_sources module, u8 module_idx)
7137 {
7138 	switch (module) {
7139 	case RAZWI_TPC:
7140 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7141 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7142 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7143 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7144 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7145 
7146 	case RAZWI_MME:
7147 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7148 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7149 
7150 	case RAZWI_EDMA:
7151 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7152 			(module_idx % NUM_OF_EDMA_PER_DCORE));
7153 
7154 	case RAZWI_PDMA:
7155 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7156 
7157 	case RAZWI_NIC:
7158 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7159 
7160 	case RAZWI_DEC:
7161 		if (module_idx == 8)
7162 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7163 
7164 		if (module_idx == 9)
7165 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7166 					;
7167 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7168 				(module_idx % NUM_OF_DEC_PER_DCORE) +
7169 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7170 
7171 	case RAZWI_ROT:
7172 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7173 
7174 	default:
7175 		return GAUDI2_ENGINE_ID_SIZE;
7176 	}
7177 }
7178 
7179 /*
7180  * This function handles RR(Range register) hit events.
7181  * raised be initiators not PSOC RAZWI.
7182  */
7183 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7184 				enum razwi_event_sources module, u8 module_idx,
7185 				u8 module_sub_idx, u64 *event_mask)
7186 {
7187 	bool via_sft = false;
7188 	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
7189 	u64 rtr_mstr_if_base_addr;
7190 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7191 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7192 	char initiator_name[64];
7193 
7194 	switch (module) {
7195 	case RAZWI_TPC:
7196 		rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7197 		sprintf(initiator_name, "TPC_%u", module_idx);
7198 		break;
7199 	case RAZWI_MME:
7200 		sprintf(initiator_name, "MME_%u", module_idx);
7201 		switch (module_sub_idx) {
7202 		case MME_WAP0:
7203 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7204 			break;
7205 		case MME_WAP1:
7206 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7207 			break;
7208 		case MME_WRITE:
7209 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7210 			break;
7211 		case MME_READ:
7212 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7213 			break;
7214 		case MME_SBTE0:
7215 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7216 			break;
7217 		case MME_SBTE1:
7218 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7219 			break;
7220 		case MME_SBTE2:
7221 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7222 			break;
7223 		case MME_SBTE3:
7224 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7225 			break;
7226 		case MME_SBTE4:
7227 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7228 			break;
7229 		default:
7230 			return;
7231 		}
7232 		break;
7233 	case RAZWI_EDMA:
7234 		sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7235 		dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7236 		via_sft = true;
7237 		sprintf(initiator_name, "EDMA_%u", module_idx);
7238 		break;
7239 	case RAZWI_PDMA:
7240 		rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7241 		sprintf(initiator_name, "PDMA_%u", module_idx);
7242 		break;
7243 	case RAZWI_NIC:
7244 		rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7245 		sprintf(initiator_name, "NIC_%u", module_idx);
7246 		break;
7247 	case RAZWI_DEC:
7248 		rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7249 		sprintf(initiator_name, "DEC_%u", module_idx);
7250 		break;
7251 	case RAZWI_ROT:
7252 		rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7253 		sprintf(initiator_name, "ROT_%u", module_idx);
7254 		break;
7255 	default:
7256 		return;
7257 	}
7258 
7259 	/* Find router mstr_if register base */
7260 	if (via_sft) {
7261 		rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7262 				dcore_id * SFT_DCORE_OFFSET +
7263 				sft_id * SFT_IF_OFFSET +
7264 				RTR_MSTR_IF_OFFSET;
7265 	} else {
7266 		dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7267 		dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7268 		rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7269 				dcore_id * DCORE_OFFSET +
7270 				dcore_rtr_id * DCORE_RTR_OFFSET +
7271 				RTR_MSTR_IF_OFFSET;
7272 	}
7273 
7274 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
7275 	hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7276 	hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7277 
7278 	if (via_sft) {
7279 		/* SFT has separate MSTR_IF for LBW, only there we can
7280 		 * read the LBW razwi related registers
7281 		 */
7282 		u64 base;
7283 
7284 		base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7285 				RTR_LBW_MSTR_IF_OFFSET;
7286 
7287 		lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7288 		lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7289 	} else {
7290 		lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7291 		lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7292 	}
7293 
7294 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
7295 	if (hbw_shrd_aw) {
7296 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7297 						initiator_name, eng_id, event_mask);
7298 
7299 		/* Clear event indication */
7300 		WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7301 	}
7302 
7303 	if (hbw_shrd_ar) {
7304 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7305 						initiator_name, eng_id, event_mask);
7306 
7307 		/* Clear event indication */
7308 		WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7309 	}
7310 
7311 	if (lbw_shrd_aw) {
7312 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7313 						initiator_name, eng_id, event_mask);
7314 
7315 		/* Clear event indication */
7316 		WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7317 	}
7318 
7319 	if (lbw_shrd_ar) {
7320 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7321 						initiator_name, eng_id, event_mask);
7322 
7323 		/* Clear event indication */
7324 		WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7325 	}
7326 }
7327 
7328 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7329 {
7330 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7331 	u8 mod_idx, sub_mod;
7332 
7333 	/* check all TPCs */
7334 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7335 		if (prop->tpc_enabled_mask & BIT(mod_idx))
7336 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
7337 	}
7338 
7339 	/* check all MMEs */
7340 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7341 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7342 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7343 									sub_mod, NULL);
7344 
7345 	/* check all EDMAs */
7346 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7347 		if (prop->edma_enabled_mask & BIT(mod_idx))
7348 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
7349 
7350 	/* check all PDMAs */
7351 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7352 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
7353 
7354 	/* check all NICs */
7355 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7356 		if (hdev->nic_ports_mask & BIT(mod_idx))
7357 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7358 								NULL);
7359 
7360 	/* check all DECs */
7361 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7362 		if (prop->decoder_enabled_mask & BIT(mod_idx))
7363 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
7364 
7365 	/* check all ROTs */
7366 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7367 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
7368 }
7369 
7370 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7371 {
7372 	switch (rtr_id) {
7373 	case DCORE0_RTR0:
7374 		return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7375 	case DCORE0_RTR1:
7376 		return "TPC0/1";
7377 	case DCORE0_RTR2:
7378 		return "TPC2/3";
7379 	case DCORE0_RTR3:
7380 		return "TPC4/5";
7381 	case DCORE0_RTR4:
7382 		return "MME0_SBTE0/1";
7383 	case DCORE0_RTR5:
7384 		return "MME0_WAP0/SBTE2";
7385 	case DCORE0_RTR6:
7386 		return "MME0_CTRL_WR/SBTE3";
7387 	case DCORE0_RTR7:
7388 		return "MME0_WAP1/CTRL_RD/SBTE4";
7389 	case DCORE1_RTR0:
7390 		return "MME1_WAP1/CTRL_RD/SBTE4";
7391 	case DCORE1_RTR1:
7392 		return "MME1_CTRL_WR/SBTE3";
7393 	case DCORE1_RTR2:
7394 		return "MME1_WAP0/SBTE2";
7395 	case DCORE1_RTR3:
7396 		return "MME1_SBTE0/1";
7397 	case DCORE1_RTR4:
7398 		return "TPC10/11";
7399 	case DCORE1_RTR5:
7400 		return "TPC8/9";
7401 	case DCORE1_RTR6:
7402 		return "TPC6/7";
7403 	case DCORE1_RTR7:
7404 		return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7405 	case DCORE2_RTR0:
7406 		return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7407 	case DCORE2_RTR1:
7408 		return "TPC16/17";
7409 	case DCORE2_RTR2:
7410 		return "TPC14/15";
7411 	case DCORE2_RTR3:
7412 		return "TPC12/13";
7413 	case DCORE2_RTR4:
7414 		return "MME2_SBTE0/1";
7415 	case DCORE2_RTR5:
7416 		return "MME2_WAP0/SBTE2";
7417 	case DCORE2_RTR6:
7418 		return "MME2_CTRL_WR/SBTE3";
7419 	case DCORE2_RTR7:
7420 		return "MME2_WAP1/CTRL_RD/SBTE4";
7421 	case DCORE3_RTR0:
7422 		return "MME3_WAP1/CTRL_RD/SBTE4";
7423 	case DCORE3_RTR1:
7424 		return "MME3_CTRL_WR/SBTE3";
7425 	case DCORE3_RTR2:
7426 		return "MME3_WAP0/SBTE2";
7427 	case DCORE3_RTR3:
7428 		return "MME3_SBTE0/1";
7429 	case DCORE3_RTR4:
7430 		return "TPC18/19";
7431 	case DCORE3_RTR5:
7432 		return "TPC20/21";
7433 	case DCORE3_RTR6:
7434 		return "TPC22/23";
7435 	case DCORE3_RTR7:
7436 		return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7437 	default:
7438 	return "N/A";
7439 	}
7440 }
7441 
7442 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
7443 {
7444 	switch (rtr_id) {
7445 	case DCORE0_RTR0:
7446 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
7447 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
7448 		engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
7449 		engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
7450 		engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7451 		engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
7452 		engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
7453 		engines[7] = GAUDI2_ENGINE_ID_PCIE;
7454 		engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
7455 		engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
7456 		engines[10] = GAUDI2_ENGINE_ID_PSOC;
7457 		return 11;
7458 
7459 	case DCORE0_RTR1:
7460 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
7461 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
7462 		return 2;
7463 
7464 	case DCORE0_RTR2:
7465 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
7466 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
7467 		return 2;
7468 
7469 	case DCORE0_RTR3:
7470 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
7471 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
7472 		return 2;
7473 
7474 	case DCORE0_RTR4:
7475 	case DCORE0_RTR5:
7476 	case DCORE0_RTR6:
7477 	case DCORE0_RTR7:
7478 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
7479 		return 1;
7480 
7481 	case DCORE1_RTR0:
7482 	case DCORE1_RTR1:
7483 	case DCORE1_RTR2:
7484 	case DCORE1_RTR3:
7485 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
7486 		return 1;
7487 
7488 	case DCORE1_RTR4:
7489 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
7490 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
7491 		return 2;
7492 
7493 	case DCORE1_RTR5:
7494 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
7495 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
7496 		return 2;
7497 
7498 	case DCORE1_RTR6:
7499 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
7500 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
7501 		return 2;
7502 
7503 	case DCORE1_RTR7:
7504 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
7505 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
7506 		engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
7507 		engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
7508 		engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
7509 		engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
7510 		engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
7511 		engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
7512 		engines[8] = GAUDI2_ENGINE_ID_KDMA;
7513 		engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
7514 		engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
7515 		return 11;
7516 
7517 	case DCORE2_RTR0:
7518 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
7519 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
7520 		engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
7521 		engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
7522 		engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
7523 		engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
7524 		engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
7525 		engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
7526 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7527 		return 9;
7528 
7529 	case DCORE2_RTR1:
7530 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
7531 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
7532 		return 2;
7533 
7534 	case DCORE2_RTR2:
7535 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
7536 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
7537 		return 2;
7538 
7539 	case DCORE2_RTR3:
7540 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
7541 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
7542 		return 2;
7543 
7544 	case DCORE2_RTR4:
7545 	case DCORE2_RTR5:
7546 	case DCORE2_RTR6:
7547 	case DCORE2_RTR7:
7548 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
7549 		return 1;
7550 	case DCORE3_RTR0:
7551 	case DCORE3_RTR1:
7552 	case DCORE3_RTR2:
7553 	case DCORE3_RTR3:
7554 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
7555 		return 1;
7556 	case DCORE3_RTR4:
7557 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
7558 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
7559 		return 2;
7560 	case DCORE3_RTR5:
7561 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
7562 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
7563 		return 2;
7564 	case DCORE3_RTR6:
7565 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
7566 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
7567 		return 2;
7568 	case DCORE3_RTR7:
7569 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
7570 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
7571 		engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
7572 		engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
7573 		engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
7574 		engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
7575 		engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
7576 		engines[7] = GAUDI2_ENGINE_ID_ROT_1;
7577 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7578 		return 9;
7579 	default:
7580 		return 0;
7581 	}
7582 }
7583 
7584 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7585 							u64 rtr_ctrl_base_addr, bool is_write,
7586 							u64 *event_mask)
7587 {
7588 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7589 	u32 razwi_hi, razwi_lo;
7590 	u8 rd_wr_flag;
7591 
7592 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7593 
7594 	if (is_write) {
7595 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7596 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7597 		rd_wr_flag = HL_RAZWI_WRITE;
7598 
7599 		/* Clear set indication */
7600 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7601 	} else {
7602 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7603 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7604 		rd_wr_flag = HL_RAZWI_READ;
7605 
7606 		/* Clear set indication */
7607 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7608 	}
7609 
7610 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
7611 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7612 	dev_err_ratelimited(hdev->dev,
7613 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7614 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7615 
7616 	dev_err_ratelimited(hdev->dev,
7617 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7618 }
7619 
7620 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7621 							u64 rtr_ctrl_base_addr, bool is_write,
7622 							u64 *event_mask)
7623 {
7624 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7625 	u32 razwi_addr;
7626 	u8 rd_wr_flag;
7627 
7628 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7629 
7630 	if (is_write) {
7631 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7632 		rd_wr_flag = HL_RAZWI_WRITE;
7633 
7634 		/* Clear set indication */
7635 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7636 	} else {
7637 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7638 		rd_wr_flag = HL_RAZWI_READ;
7639 
7640 		/* Clear set indication */
7641 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7642 	}
7643 
7644 	hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
7645 			event_mask);
7646 	dev_err_ratelimited(hdev->dev,
7647 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7648 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
7649 
7650 	dev_err_ratelimited(hdev->dev,
7651 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7652 }
7653 
7654 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
7655 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
7656 {
7657 	u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7658 						razwi_mask_info, razwi_intr = 0, error_count = 0;
7659 	int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7660 	u64 rtr_ctrl_base_addr;
7661 
7662 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7663 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7664 		if (!razwi_intr)
7665 			return 0;
7666 	}
7667 
7668 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7669 	xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7670 
7671 	dev_err_ratelimited(hdev->dev,
7672 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7673 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7674 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7675 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7676 		xy,
7677 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7678 
7679 	if (xy == 0) {
7680 		dev_err_ratelimited(hdev->dev,
7681 				"PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7682 		goto clear;
7683 	}
7684 
7685 	/* Find router id by router coordinates */
7686 	for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7687 		if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7688 			break;
7689 
7690 	if (rtr_id == rtr_map_arr_len) {
7691 		dev_err_ratelimited(hdev->dev,
7692 				"PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7693 		goto clear;
7694 	}
7695 
7696 	/* Find router mstr_if register base */
7697 	dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7698 	dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7699 	rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7700 				dcore_rtr_id * DCORE_RTR_OFFSET;
7701 
7702 	hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7703 	hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7704 	lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7705 	lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7706 
7707 	if (hbw_aw_set)
7708 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7709 						rtr_ctrl_base_addr, true, event_mask);
7710 
7711 	if (hbw_ar_set)
7712 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7713 						rtr_ctrl_base_addr, false, event_mask);
7714 
7715 	if (lbw_aw_set)
7716 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7717 						rtr_ctrl_base_addr, true, event_mask);
7718 
7719 	if (lbw_ar_set)
7720 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7721 						rtr_ctrl_base_addr, false, event_mask);
7722 
7723 	error_count++;
7724 
7725 clear:
7726 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7727 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7728 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7729 
7730 	return error_count;
7731 }
7732 
7733 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
7734 {
7735 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7736 
7737 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7738 
7739 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7740 		if (sts_val & BIT(i)) {
7741 			gaudi2_print_event(hdev, event_type, true,
7742 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
7743 			sts_clr_val |= BIT(i);
7744 			error_count++;
7745 		}
7746 	}
7747 
7748 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7749 
7750 	return error_count;
7751 }
7752 
7753 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7754 					bool extended_err_check, u64 *event_mask)
7755 {
7756 	enum razwi_event_sources module;
7757 	u32 error_count = 0;
7758 	u64 qman_base;
7759 	u8 index;
7760 
7761 	switch (event_type) {
7762 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7763 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7764 		qman_base = mmDCORE0_TPC0_QM_BASE +
7765 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7766 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7767 		module = RAZWI_TPC;
7768 		break;
7769 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7770 		qman_base = mmDCORE0_TPC6_QM_BASE;
7771 		module = RAZWI_TPC;
7772 		break;
7773 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7774 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7775 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7776 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7777 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7778 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7779 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7780 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7781 		module = RAZWI_MME;
7782 		break;
7783 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7784 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7785 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7786 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7787 		module = RAZWI_PDMA;
7788 		break;
7789 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7790 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7791 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7792 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7793 		module = RAZWI_ROT;
7794 		break;
7795 	default:
7796 		return 0;
7797 	}
7798 
7799 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7800 
7801 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
7802 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7803 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7804 		error_count += _gaudi2_handle_qm_sei_err(hdev,
7805 					qman_base + NIC_QM_OFFSET, event_type);
7806 
7807 	if (extended_err_check)
7808 		/* check if RAZWI happened */
7809 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
7810 
7811 	return error_count;
7812 }
7813 
7814 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7815 {
7816 	u32 qid_base, error_count = 0;
7817 	u64 qman_base;
7818 	u8 index;
7819 
7820 	switch (event_type) {
7821 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7822 		index = event_type - GAUDI2_EVENT_TPC0_QM;
7823 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7824 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7825 		break;
7826 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7827 		index = event_type - GAUDI2_EVENT_TPC6_QM;
7828 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7829 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7830 		break;
7831 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7832 		index = event_type - GAUDI2_EVENT_TPC12_QM;
7833 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7834 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7835 		break;
7836 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7837 		index = event_type - GAUDI2_EVENT_TPC18_QM;
7838 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7839 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7840 		break;
7841 	case GAUDI2_EVENT_TPC24_QM:
7842 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7843 		qman_base = mmDCORE0_TPC6_QM_BASE;
7844 		break;
7845 	case GAUDI2_EVENT_MME0_QM:
7846 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7847 		qman_base = mmDCORE0_MME_QM_BASE;
7848 		break;
7849 	case GAUDI2_EVENT_MME1_QM:
7850 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7851 		qman_base = mmDCORE1_MME_QM_BASE;
7852 		break;
7853 	case GAUDI2_EVENT_MME2_QM:
7854 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7855 		qman_base = mmDCORE2_MME_QM_BASE;
7856 		break;
7857 	case GAUDI2_EVENT_MME3_QM:
7858 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7859 		qman_base = mmDCORE3_MME_QM_BASE;
7860 		break;
7861 	case GAUDI2_EVENT_HDMA0_QM:
7862 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7863 		qman_base = mmDCORE0_EDMA0_QM_BASE;
7864 		break;
7865 	case GAUDI2_EVENT_HDMA1_QM:
7866 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7867 		qman_base = mmDCORE0_EDMA1_QM_BASE;
7868 		break;
7869 	case GAUDI2_EVENT_HDMA2_QM:
7870 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7871 		qman_base = mmDCORE1_EDMA0_QM_BASE;
7872 		break;
7873 	case GAUDI2_EVENT_HDMA3_QM:
7874 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7875 		qman_base = mmDCORE1_EDMA1_QM_BASE;
7876 		break;
7877 	case GAUDI2_EVENT_HDMA4_QM:
7878 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7879 		qman_base = mmDCORE2_EDMA0_QM_BASE;
7880 		break;
7881 	case GAUDI2_EVENT_HDMA5_QM:
7882 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7883 		qman_base = mmDCORE2_EDMA1_QM_BASE;
7884 		break;
7885 	case GAUDI2_EVENT_HDMA6_QM:
7886 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7887 		qman_base = mmDCORE3_EDMA0_QM_BASE;
7888 		break;
7889 	case GAUDI2_EVENT_HDMA7_QM:
7890 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7891 		qman_base = mmDCORE3_EDMA1_QM_BASE;
7892 		break;
7893 	case GAUDI2_EVENT_PDMA0_QM:
7894 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7895 		qman_base = mmPDMA0_QM_BASE;
7896 		break;
7897 	case GAUDI2_EVENT_PDMA1_QM:
7898 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7899 		qman_base = mmPDMA1_QM_BASE;
7900 		break;
7901 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7902 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7903 		qman_base = mmROT0_QM_BASE;
7904 		break;
7905 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7906 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7907 		qman_base = mmROT1_QM_BASE;
7908 		break;
7909 	default:
7910 		return 0;
7911 	}
7912 
7913 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
7914 
7915 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7916 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7917 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7918 
7919 	return error_count;
7920 }
7921 
7922 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
7923 {
7924 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7925 
7926 	sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7927 
7928 	for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7929 		if (sts_val & BIT(i)) {
7930 			gaudi2_print_event(hdev, event_type, true,
7931 				"err cause: %s", gaudi2_arc_sei_error_cause[i]);
7932 			sts_clr_val |= BIT(i);
7933 			error_count++;
7934 		}
7935 	}
7936 
7937 	WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
7938 
7939 	return error_count;
7940 }
7941 
7942 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
7943 {
7944 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7945 
7946 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
7947 
7948 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
7949 		if (sts_val & BIT(i)) {
7950 			gaudi2_print_event(hdev, event_type, true,
7951 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
7952 			sts_clr_val |= BIT(i);
7953 			error_count++;
7954 		}
7955 	}
7956 
7957 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
7958 
7959 	return error_count;
7960 }
7961 
7962 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
7963 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
7964 					u64 *event_mask)
7965 {
7966 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7967 	u32 error_count = 0;
7968 	int i;
7969 
7970 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
7971 		if (intr_cause_data & BIT(i)) {
7972 			gaudi2_print_event(hdev, event_type, true,
7973 				"err cause: %s", guadi2_rot_error_cause[i]);
7974 			error_count++;
7975 		}
7976 
7977 	/* check if RAZWI happened */
7978 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
7979 
7980 	return error_count;
7981 }
7982 
7983 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
7984 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
7985 					u64 *event_mask)
7986 {
7987 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
7988 	u32 error_count = 0;
7989 	int i;
7990 
7991 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
7992 		if (intr_cause_data & BIT(i)) {
7993 			gaudi2_print_event(hdev, event_type, true,
7994 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
7995 			error_count++;
7996 		}
7997 
7998 	/* check if RAZWI happened */
7999 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8000 
8001 	return error_count;
8002 }
8003 
8004 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8005 					u64 *event_mask)
8006 {
8007 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8008 	int i;
8009 
8010 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8011 		/* DCORE DEC */
8012 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8013 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8014 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8015 	else
8016 		/* PCIE DEC */
8017 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8018 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8019 
8020 	sts_val = RREG32(sts_addr);
8021 
8022 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8023 		if (sts_val & BIT(i)) {
8024 			gaudi2_print_event(hdev, event_type, true,
8025 				"err cause: %s", gaudi2_dec_error_cause[i]);
8026 			sts_clr_val |= BIT(i);
8027 			error_count++;
8028 		}
8029 	}
8030 
8031 	/* check if RAZWI happened */
8032 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8033 
8034 	/* Write 1 clear errors */
8035 	WREG32(sts_addr, sts_clr_val);
8036 
8037 	return error_count;
8038 }
8039 
8040 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8041 					u64 *event_mask)
8042 {
8043 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8044 	int i;
8045 
8046 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8047 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8048 
8049 	sts_val = RREG32(sts_addr);
8050 
8051 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8052 		if (sts_val & BIT(i)) {
8053 			gaudi2_print_event(hdev, event_type, true,
8054 				"err cause: %s", guadi2_mme_error_cause[i]);
8055 			sts_clr_val |= BIT(i);
8056 			error_count++;
8057 		}
8058 	}
8059 
8060 	/* check if RAZWI happened */
8061 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8062 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8063 
8064 	WREG32(sts_clr_addr, sts_clr_val);
8065 
8066 	return error_count;
8067 }
8068 
8069 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8070 					u64 intr_cause_data)
8071 {
8072 	int i, error_count = 0;
8073 
8074 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8075 		if (intr_cause_data & BIT(i)) {
8076 			gaudi2_print_event(hdev, event_type, true,
8077 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8078 			error_count++;
8079 		}
8080 
8081 	return error_count;
8082 }
8083 
8084 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8085 					u64 *event_mask)
8086 {
8087 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8088 	int i;
8089 
8090 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8091 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8092 
8093 	sts_val = RREG32(sts_addr);
8094 
8095 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8096 		if (sts_val & BIT(i)) {
8097 			gaudi2_print_event(hdev, event_type, true,
8098 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8099 			sts_clr_val |= BIT(i);
8100 			error_count++;
8101 		}
8102 	}
8103 
8104 	/* check if RAZWI happened on WAP0/1 */
8105 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8106 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8107 
8108 	WREG32(sts_clr_addr, sts_clr_val);
8109 
8110 	return error_count;
8111 }
8112 
8113 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8114 					u64 intr_cause_data)
8115 {
8116 	u32 error_count = 0;
8117 	int i;
8118 
8119 	/* If an AXI read or write error is received, an error is reported and
8120 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8121 	 * register of the KDMA engine, the reported error is always HBW even if
8122 	 * the actual error caused by a LBW KDMA transaction.
8123 	 */
8124 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8125 		if (intr_cause_data & BIT(i)) {
8126 			gaudi2_print_event(hdev, event_type, true,
8127 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8128 			error_count++;
8129 		}
8130 
8131 	return error_count;
8132 }
8133 
8134 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
8135 					u64 intr_cause_data)
8136 {
8137 	u32 error_count = 0;
8138 	int i;
8139 
8140 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8141 		if (intr_cause_data & BIT(i)) {
8142 			gaudi2_print_event(hdev, event_type, true,
8143 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8144 			error_count++;
8145 		}
8146 
8147 	return error_count;
8148 }
8149 
8150 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8151 {
8152 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8153 
8154 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8155 	if (RREG32(razwi_happened_addr)) {
8156 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8157 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8158 		WREG32(razwi_happened_addr, 0x1);
8159 	}
8160 
8161 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8162 	if (RREG32(razwi_happened_addr)) {
8163 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8164 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8165 		WREG32(razwi_happened_addr, 0x1);
8166 	}
8167 
8168 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8169 	if (RREG32(razwi_happened_addr)) {
8170 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8171 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8172 		WREG32(razwi_happened_addr, 0x1);
8173 	}
8174 
8175 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8176 	if (RREG32(razwi_happened_addr)) {
8177 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8178 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8179 		WREG32(razwi_happened_addr, 0x1);
8180 	}
8181 }
8182 
8183 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8184 					u64 intr_cause_data, u64 *event_mask)
8185 {
8186 	u32 error_count = 0;
8187 	int i;
8188 
8189 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8190 		if (!(intr_cause_data & BIT_ULL(i)))
8191 			continue;
8192 
8193 		gaudi2_print_event(hdev, event_type, true,
8194 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8195 		error_count++;
8196 
8197 		switch (intr_cause_data & BIT_ULL(i)) {
8198 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8199 			break;
8200 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8201 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8202 			break;
8203 		}
8204 	}
8205 
8206 	return error_count;
8207 }
8208 
8209 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8210 				u64 intr_cause_data)
8211 
8212 {
8213 	u32 error_count = 0;
8214 	int i;
8215 
8216 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8217 		if (intr_cause_data & BIT_ULL(i)) {
8218 			gaudi2_print_event(hdev, event_type, true,
8219 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8220 			error_count++;
8221 		}
8222 	}
8223 
8224 	return error_count;
8225 }
8226 
8227 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8228 {
8229 	u32 error_count = 0;
8230 	int i;
8231 
8232 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8233 		if (intr_cause_data & BIT_ULL(i)) {
8234 			gaudi2_print_event(hdev, event_type, true,
8235 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8236 			error_count++;
8237 		}
8238 	}
8239 
8240 	return error_count;
8241 }
8242 
8243 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8244 					u64 *event_mask)
8245 {
8246 	u32 valid, val;
8247 	u64 addr;
8248 
8249 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8250 
8251 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8252 		return;
8253 
8254 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8255 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8256 	addr <<= 32;
8257 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8258 
8259 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8260 				is_pmmu ? "PMMU" : "HMMU", addr);
8261 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8262 
8263 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8264 }
8265 
8266 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8267 {
8268 	u32 valid, val;
8269 	u64 addr;
8270 
8271 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8272 
8273 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8274 		return;
8275 
8276 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8277 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8278 	addr <<= 32;
8279 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8280 
8281 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8282 				is_pmmu ? "PMMU" : "HMMU", addr);
8283 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8284 }
8285 
8286 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8287 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8288 {
8289 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8290 	int i;
8291 
8292 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8293 
8294 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8295 		if (spi_sei_cause & BIT(i)) {
8296 			gaudi2_print_event(hdev, event_type, true,
8297 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8298 
8299 			if (i == 0)
8300 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8301 			else if (i == 1)
8302 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8303 
8304 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8305 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8306 
8307 			error_count++;
8308 		}
8309 	}
8310 
8311 	/* Clear cause */
8312 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8313 
8314 	/* Clear interrupt */
8315 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8316 
8317 	return error_count;
8318 }
8319 
8320 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8321 {
8322 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8323 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8324 	int i;
8325 
8326 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8327 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8328 
8329 	sei_cause_val = RREG32(sei_cause_addr);
8330 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8331 	cq_intr_val = RREG32(cq_intr_addr);
8332 
8333 	/* SEI interrupt */
8334 	if (sei_cause_cause) {
8335 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8336 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8337 					sei_cause_val);
8338 
8339 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8340 			if (!(sei_cause_cause & BIT(i)))
8341 				continue;
8342 
8343 			gaudi2_print_event(hdev, event_type, true,
8344 				"err cause: %s. %s: 0x%X\n",
8345 				gaudi2_sm_sei_cause[i].cause_name,
8346 				gaudi2_sm_sei_cause[i].log_name,
8347 				sei_cause_log);
8348 			error_count++;
8349 			break;
8350 		}
8351 
8352 		/* Clear SM_SEI_CAUSE */
8353 		WREG32(sei_cause_addr, 0);
8354 	}
8355 
8356 	/* CQ interrupt */
8357 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8358 		cq_intr_queue_index =
8359 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8360 					cq_intr_val);
8361 
8362 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8363 				sm_index, cq_intr_queue_index);
8364 		error_count++;
8365 
8366 		/* Clear CQ_INTR */
8367 		WREG32(cq_intr_addr, 0);
8368 	}
8369 
8370 	return error_count;
8371 }
8372 
8373 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8374 {
8375 	bool is_pmmu = false;
8376 	u32 error_count = 0;
8377 	u64 mmu_base;
8378 	u8 index;
8379 
8380 	switch (event_type) {
8381 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8382 		index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8383 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8384 		break;
8385 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8386 		index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8387 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8388 		break;
8389 	case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8390 		index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8391 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8392 		break;
8393 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8394 		index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8395 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8396 		break;
8397 	case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8398 		index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8399 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8400 		break;
8401 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8402 		index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8403 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8404 		break;
8405 	case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8406 		index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8407 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8408 		break;
8409 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8410 		index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8411 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8412 		break;
8413 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8414 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8415 		is_pmmu = true;
8416 		mmu_base = mmPMMU_HBW_MMU_BASE;
8417 		break;
8418 	default:
8419 		return 0;
8420 	}
8421 
8422 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
8423 							is_pmmu, event_mask);
8424 
8425 	return error_count;
8426 }
8427 
8428 
8429 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
8430 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8431 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8432 {
8433 	u32 addr, beat, beat_shift;
8434 	bool rc = false;
8435 
8436 	dev_err_ratelimited(hdev->dev,
8437 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8438 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8439 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8440 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8441 
8442 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8443 	dev_err_ratelimited(hdev->dev,
8444 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8445 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8446 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8447 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8448 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8449 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8450 
8451 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
8452 	for (beat = 0 ; beat < 4 ; beat++) {
8453 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8454 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8455 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8456 						beat,
8457 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8458 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8459 
8460 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8461 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8462 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8463 						beat,
8464 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8465 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8466 			rc |= true;
8467 		}
8468 
8469 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8470 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8471 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8472 			dev_err_ratelimited(hdev->dev,
8473 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8474 					beat,
8475 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8476 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8477 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8478 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8479 			rc |= true;
8480 		}
8481 
8482 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8483 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8484 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8485 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8486 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8487 	}
8488 
8489 	return rc;
8490 }
8491 
8492 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8493 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8494 {
8495 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8496 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8497 
8498 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8499 
8500 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8501 				derr & 0x3, derr & 0xc);
8502 
8503 	/* JIRA H6-3286 - the following prints may not be valid */
8504 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8505 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8506 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8507 		dev_err_ratelimited(hdev->dev,
8508 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8509 				i,
8510 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8511 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8512 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8513 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8514 	}
8515 }
8516 
8517 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8518 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8519 {
8520 	__le32 *col_cmd = ca_par_err_data->dbg_col;
8521 	__le16 *row_cmd = ca_par_err_data->dbg_row;
8522 	u32 i;
8523 
8524 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8525 
8526 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8527 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8528 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8529 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8530 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8531 }
8532 
8533 /* Returns true if hard reset is needed or false otherwise */
8534 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8535 					struct hl_eq_hbm_sei_data *sei_data)
8536 {
8537 	bool require_hard_reset = false;
8538 	u32 hbm_id, mc_id, cause_idx;
8539 
8540 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8541 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8542 
8543 	cause_idx = sei_data->hdr.sei_cause;
8544 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8545 		gaudi2_print_event(hdev, event_type, true,
8546 			"err cause: %s",
8547 			"Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx);
8548 		return true;
8549 	}
8550 
8551 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
8552 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8553 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
8554 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8555 		hbm_mc_sei_cause[cause_idx]);
8556 
8557 	/* Print error-specific info */
8558 	switch (cause_idx) {
8559 	case HBM_SEI_CATTRIP:
8560 		require_hard_reset = true;
8561 		break;
8562 
8563 	case  HBM_SEI_CMD_PARITY_EVEN:
8564 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8565 						le32_to_cpu(sei_data->hdr.cnt));
8566 		require_hard_reset = true;
8567 		break;
8568 
8569 	case  HBM_SEI_CMD_PARITY_ODD:
8570 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8571 						le32_to_cpu(sei_data->hdr.cnt));
8572 		require_hard_reset = true;
8573 		break;
8574 
8575 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
8576 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8577 						le32_to_cpu(sei_data->hdr.cnt));
8578 		require_hard_reset = true;
8579 		break;
8580 
8581 	case HBM_SEI_READ_ERR:
8582 		/* Unlike other SEI events, read error requires further processing of the
8583 		 * raw data in order to determine the root cause.
8584 		 */
8585 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8586 								&sei_data->read_err_info,
8587 								le32_to_cpu(sei_data->hdr.cnt));
8588 		break;
8589 
8590 	default:
8591 		break;
8592 	}
8593 
8594 	require_hard_reset |= !!sei_data->hdr.is_critical;
8595 
8596 	return require_hard_reset;
8597 }
8598 
8599 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
8600 				u64 intr_cause_data)
8601 {
8602 	if (intr_cause_data) {
8603 		gaudi2_print_event(hdev, event_type, true,
8604 			"temperature error cause: %#llx", intr_cause_data);
8605 		return 1;
8606 	}
8607 
8608 	return 0;
8609 }
8610 
8611 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8612 {
8613 	u32 i, error_count = 0;
8614 
8615 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8616 		if (intr_cause_data & hbm_mc_spi[i].mask) {
8617 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8618 				hbm_mc_spi[i].cause);
8619 			error_count++;
8620 		}
8621 
8622 	return error_count;
8623 }
8624 
8625 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8626 {
8627 	ktime_t zero_time = ktime_set(0, 0);
8628 
8629 	mutex_lock(&hdev->clk_throttling.lock);
8630 
8631 	switch (event_type) {
8632 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8633 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8634 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8635 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8636 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8637 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8638 		break;
8639 
8640 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8641 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8642 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8643 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8644 		break;
8645 
8646 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8647 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8648 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8649 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8650 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8651 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8652 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8653 		break;
8654 
8655 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8656 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8657 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8658 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8659 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8660 		break;
8661 
8662 	default:
8663 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8664 		break;
8665 	}
8666 
8667 	mutex_unlock(&hdev->clk_throttling.lock);
8668 }
8669 
8670 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
8671 					struct cpucp_pkt_sync_err *sync_err)
8672 {
8673 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8674 
8675 	gaudi2_print_event(hdev, event_type, false,
8676 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8677 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
8678 		q->pi, atomic_read(&q->ci));
8679 }
8680 
8681 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
8682 {
8683 	u32 p2p_intr, msix_gw_intr, error_count = 0;
8684 
8685 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8686 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8687 
8688 	if (p2p_intr) {
8689 		gaudi2_print_event(hdev, event_type, true,
8690 			"pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8691 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8692 
8693 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8694 		error_count++;
8695 	}
8696 
8697 	if (msix_gw_intr) {
8698 		gaudi2_print_event(hdev, event_type, true,
8699 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8700 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8701 
8702 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8703 		error_count++;
8704 	}
8705 
8706 	return error_count;
8707 }
8708 
8709 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
8710 			struct hl_eq_pcie_drain_ind_data *drain_data)
8711 {
8712 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
8713 
8714 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8715 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8716 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8717 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8718 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8719 
8720 	if (cause & BIT_ULL(0)) {
8721 		dev_err_ratelimited(hdev->dev,
8722 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8723 			!!lbw_rd, !!lbw_wr);
8724 		error_count++;
8725 	}
8726 
8727 	if (cause & BIT_ULL(1)) {
8728 		dev_err_ratelimited(hdev->dev,
8729 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8730 			hbw_rd, hbw_wr);
8731 		error_count++;
8732 	}
8733 
8734 	return error_count;
8735 }
8736 
8737 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8738 {
8739 	u32 error_count = 0;
8740 	int i;
8741 
8742 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8743 		if (intr_cause_data & BIT_ULL(i)) {
8744 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8745 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
8746 			error_count++;
8747 		}
8748 	}
8749 
8750 	return error_count;
8751 }
8752 
8753 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
8754 					struct cpucp_pkt_sync_err *sync_err)
8755 {
8756 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8757 
8758 	gaudi2_print_event(hdev, event_type, false,
8759 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8760 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8761 }
8762 
8763 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
8764 					struct hl_eq_engine_arc_intr_data *data)
8765 {
8766 	struct hl_engine_arc_dccm_queue_full_irq *q;
8767 	u32 intr_type, engine_id;
8768 	u64 payload;
8769 
8770 	intr_type = le32_to_cpu(data->intr_type);
8771 	engine_id = le32_to_cpu(data->engine_id);
8772 	payload = le64_to_cpu(data->payload);
8773 
8774 	switch (intr_type) {
8775 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8776 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8777 
8778 		gaudi2_print_event(hdev, event_type, true,
8779 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8780 				engine_id, intr_type, q->queue_index);
8781 		return 1;
8782 	default:
8783 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n");
8784 		return 0;
8785 	}
8786 }
8787 
8788 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8789 {
8790 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
8791 	bool reset_required = false, is_critical = false;
8792 	u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0;
8793 	u64 event_mask = 0;
8794 	u16 event_type;
8795 
8796 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
8797 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8798 
8799 	if (event_type >= GAUDI2_EVENT_SIZE) {
8800 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8801 				event_type, GAUDI2_EVENT_SIZE - 1);
8802 		return;
8803 	}
8804 
8805 	gaudi2->events_stat[event_type]++;
8806 	gaudi2->events_stat_aggregate[event_type]++;
8807 
8808 	switch (event_type) {
8809 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8810 		fallthrough;
8811 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8812 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8813 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8814 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8815 		is_critical = eq_entry->ecc_data.is_critical;
8816 		error_count++;
8817 		break;
8818 
8819 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8820 		fallthrough;
8821 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8822 		fallthrough;
8823 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8824 		error_count = gaudi2_handle_qman_err(hdev, event_type);
8825 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8826 		break;
8827 
8828 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8829 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8830 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
8831 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8832 		break;
8833 
8834 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8835 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
8836 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8837 		break;
8838 
8839 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8840 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8841 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8842 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
8843 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8844 		break;
8845 
8846 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8847 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8848 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8849 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
8850 					&eq_entry->razwi_with_intr_cause, &event_mask);
8851 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
8852 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8853 		break;
8854 
8855 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8856 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8857 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
8858 						&eq_entry->razwi_with_intr_cause, &event_mask);
8859 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
8860 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8861 		break;
8862 
8863 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8864 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8865 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
8866 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8867 		break;
8868 
8869 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8870 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8871 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8872 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8873 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8874 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8875 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8876 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8877 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8878 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8879 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8880 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8881 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8882 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8883 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8884 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8885 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8886 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8887 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8888 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8889 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8890 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8891 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8892 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8893 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8894 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8895 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8896 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
8897 					&eq_entry->razwi_with_intr_cause, &event_mask);
8898 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8899 		break;
8900 
8901 	case GAUDI2_EVENT_DEC0_SPI:
8902 	case GAUDI2_EVENT_DEC1_SPI:
8903 	case GAUDI2_EVENT_DEC2_SPI:
8904 	case GAUDI2_EVENT_DEC3_SPI:
8905 	case GAUDI2_EVENT_DEC4_SPI:
8906 	case GAUDI2_EVENT_DEC5_SPI:
8907 	case GAUDI2_EVENT_DEC6_SPI:
8908 	case GAUDI2_EVENT_DEC7_SPI:
8909 	case GAUDI2_EVENT_DEC8_SPI:
8910 	case GAUDI2_EVENT_DEC9_SPI:
8911 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8912 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8913 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
8914 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8915 		break;
8916 
8917 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8918 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8919 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8920 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8921 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8922 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8923 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8924 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
8925 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
8926 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8927 		break;
8928 
8929 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
8930 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
8931 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
8932 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
8933 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
8934 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
8935 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
8936 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
8937 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8938 		break;
8939 
8940 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
8941 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
8942 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
8943 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
8944 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
8945 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
8946 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
8947 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
8948 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8949 		break;
8950 
8951 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
8952 	case GAUDI2_EVENT_KDMA0_CORE:
8953 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
8954 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8955 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8956 		break;
8957 
8958 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
8959 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
8960 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8961 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8962 		break;
8963 
8964 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
8965 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
8966 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
8967 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8968 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8969 		break;
8970 
8971 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8972 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8973 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8974 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8975 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
8976 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8977 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8978 		break;
8979 
8980 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
8981 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
8982 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8983 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8984 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8985 		break;
8986 
8987 	case GAUDI2_EVENT_PMMU_FATAL_0:
8988 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
8989 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
8990 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8991 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8992 		break;
8993 
8994 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
8995 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
8996 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8997 		break;
8998 
8999 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9000 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9001 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9002 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9003 			reset_required = true;
9004 		}
9005 		error_count++;
9006 		break;
9007 
9008 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9009 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9010 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9011 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9012 		break;
9013 
9014 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9015 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9016 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9017 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9018 		break;
9019 
9020 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9021 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9022 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9023 		break;
9024 
9025 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9026 		error_count = gaudi2_handle_psoc_drain(hdev,
9027 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9028 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9029 		break;
9030 
9031 	case GAUDI2_EVENT_CPU_AXI_ECC:
9032 		error_count = GAUDI2_NA_EVENT_CAUSE;
9033 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9034 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9035 		break;
9036 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9037 		error_count = GAUDI2_NA_EVENT_CAUSE;
9038 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9039 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9040 		break;
9041 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9042 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9043 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9044 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9045 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9046 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9047 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9048 		break;
9049 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9050 		error_count = GAUDI2_NA_EVENT_CAUSE;
9051 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9052 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9053 		break;
9054 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9055 		error_count = GAUDI2_NA_EVENT_CAUSE;
9056 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9057 		break;
9058 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9059 		error_count = GAUDI2_NA_EVENT_CAUSE;
9060 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9061 		break;
9062 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9063 		error_count = GAUDI2_NA_EVENT_CAUSE;
9064 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9065 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9066 		break;
9067 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9068 		error_count = GAUDI2_NA_EVENT_CAUSE;
9069 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9070 		break;
9071 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9072 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9073 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9074 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9075 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9076 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9077 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9078 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9079 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9080 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9081 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9082 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9083 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9084 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9085 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9086 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9087 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9088 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9089 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9090 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9091 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9092 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9093 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9094 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9095 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9096 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9097 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9098 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9099 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9100 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9101 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9102 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9103 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9104 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9105 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9106 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9107 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9108 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9109 		fallthrough;
9110 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9111 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9112 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9113 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9114 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9115 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9116 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9117 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9118 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9119 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9120 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9121 		error_count = GAUDI2_NA_EVENT_CAUSE;
9122 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9123 		break;
9124 
9125 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9126 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9127 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9128 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9129 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9130 		error_count = GAUDI2_NA_EVENT_CAUSE;
9131 		break;
9132 
9133 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9134 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9135 		error_count = GAUDI2_NA_EVENT_CAUSE;
9136 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9137 		break;
9138 
9139 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9140 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9141 		error_count = GAUDI2_NA_EVENT_CAUSE;
9142 		/* Do nothing- FW will handle it */
9143 		break;
9144 
9145 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9146 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9147 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9148 		break;
9149 
9150 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9151 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9152 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9153 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9154 		break;
9155 
9156 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9157 		error_count = GAUDI2_NA_EVENT_CAUSE;
9158 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9159 		break;
9160 
9161 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9162 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9163 						le64_to_cpu(eq_entry->data[0]));
9164 		error_count = GAUDI2_NA_EVENT_CAUSE;
9165 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9166 		break;
9167 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9168 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9169 						le64_to_cpu(eq_entry->data[0]));
9170 		error_count = GAUDI2_NA_EVENT_CAUSE;
9171 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9172 		break;
9173 
9174 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9175 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9176 		error_count = GAUDI2_NA_EVENT_CAUSE;
9177 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9178 		break;
9179 
9180 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9181 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9182 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9183 		break;
9184 
9185 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9186 	case GAUDI2_EVENT_DEV_RESET_REQ:
9187 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9188 		error_count = GAUDI2_NA_EVENT_CAUSE;
9189 		is_critical = true;
9190 		break;
9191 
9192 	default:
9193 		if (gaudi2_irq_map_table[event_type].valid) {
9194 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9195 						event_type);
9196 			error_count = GAUDI2_NA_EVENT_CAUSE;
9197 		}
9198 	}
9199 
9200 	/* Make sure to dump an error in case no error cause was printed so far.
9201 	 * Note that although we have counted the errors, we use this number as
9202 	 * a boolean.
9203 	 */
9204 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9205 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9206 	else if (error_count == 0)
9207 		gaudi2_print_event(hdev, event_type, true,
9208 				"No error cause for H/W event %u\n", event_type);
9209 
9210 	if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
9211 				(hdev->hard_reset_on_fw_events ||
9212 				(hdev->asic_prop.fw_security_enabled && is_critical)))
9213 		goto reset_device;
9214 
9215 	/* Send unmask irq only for interrupts not classified as MSG */
9216 	if (!gaudi2_irq_map_table[event_type].msg)
9217 		hl_fw_unmask_irq(hdev, event_type);
9218 
9219 	if (event_mask)
9220 		hl_notifier_event_send_all(hdev, event_mask);
9221 
9222 	return;
9223 
9224 reset_device:
9225 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9226 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9227 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9228 	} else {
9229 		reset_flags |= HL_DRV_RESET_DELAY;
9230 	}
9231 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9232 	hl_device_cond_reset(hdev, reset_flags, event_mask);
9233 }
9234 
9235 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9236 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9237 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9238 {
9239 	u32 ctl, pkt_size;
9240 	int rc = 0;
9241 
9242 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9243 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9244 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9245 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9246 
9247 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9248 	lin_dma_pkt->src_addr = cpu_to_le64(val);
9249 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9250 	lin_dma_pkt->tsize = cpu_to_le32(size);
9251 
9252 	pkt_size = sizeof(struct packet_lin_dma);
9253 
9254 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9255 	if (rc)
9256 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9257 				hw_queue_id);
9258 
9259 	return rc;
9260 }
9261 
9262 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9263 {
9264 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9265 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9266 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9267 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9268 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9269 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9270 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9271 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9272 	void *lin_dma_pkts_arr;
9273 	dma_addr_t pkt_dma_addr;
9274 	int rc = 0, dma_num = 0;
9275 
9276 	if (prop->edma_enabled_mask == 0) {
9277 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9278 		return -EIO;
9279 	}
9280 
9281 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9282 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9283 	comp_addr = CFG_BASE + sob_addr;
9284 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9285 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9286 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9287 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9288 
9289 	/* Calculate how many lin dma pkts we'll need */
9290 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9291 	pkt_size = sizeof(struct packet_lin_dma);
9292 
9293 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9294 					&pkt_dma_addr, GFP_KERNEL);
9295 	if (!lin_dma_pkts_arr)
9296 		return -ENOMEM;
9297 
9298 	/*
9299 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9300 	 * only the first one to restore later
9301 	 * also set the sob addr for all edma cores for completion.
9302 	 * set QM as trusted to allow it to access physical address with MMU bp.
9303 	 */
9304 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9305 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9306 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9307 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9308 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9309 
9310 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9311 				continue;
9312 
9313 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9314 					edma_offset, mmubp);
9315 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9316 					lower_32_bits(comp_addr));
9317 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9318 					upper_32_bits(comp_addr));
9319 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9320 					comp_val);
9321 			gaudi2_qman_set_test_mode(hdev,
9322 					edma_queues_id[dcore] + 4 * edma_idx, true);
9323 		}
9324 	}
9325 
9326 	WREG32(sob_addr, 0);
9327 
9328 	while (cur_addr < end_addr) {
9329 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9330 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9331 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9332 
9333 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9334 					continue;
9335 
9336 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9337 
9338 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9339 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9340 					pkt_dma_addr + dma_num * pkt_size,
9341 					edma_queues_id[dcore] + edma_idx * 4,
9342 					chunk_size, cur_addr, val);
9343 				if (rc)
9344 					goto end;
9345 
9346 				dma_num++;
9347 				cur_addr += chunk_size;
9348 				if (cur_addr == end_addr)
9349 					break;
9350 			}
9351 		}
9352 	}
9353 
9354 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9355 	if (rc) {
9356 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9357 		goto end;
9358 	}
9359 end:
9360 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9361 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9362 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9363 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9364 
9365 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9366 				continue;
9367 
9368 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9369 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9370 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9371 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9372 			gaudi2_qman_set_test_mode(hdev,
9373 					edma_queues_id[dcore] + 4 * edma_idx, false);
9374 		}
9375 	}
9376 
9377 	WREG32(sob_addr, 0);
9378 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
9379 
9380 	return rc;
9381 }
9382 
9383 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9384 {
9385 	int rc;
9386 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9387 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
9388 
9389 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9390 
9391 	if (rc)
9392 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9393 				prop->dram_user_base_address, size);
9394 	return rc;
9395 }
9396 
9397 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9398 {
9399 	int rc;
9400 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9401 	u64 val = hdev->memory_scrub_val;
9402 	u64 addr, size;
9403 
9404 	if (!hdev->memory_scrub)
9405 		return 0;
9406 
9407 	/* scrub SRAM */
9408 	addr = prop->sram_user_base_address;
9409 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9410 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9411 			addr, addr + size, val);
9412 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9413 	if (rc) {
9414 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9415 		return rc;
9416 	}
9417 
9418 	/* scrub DRAM */
9419 	rc = gaudi2_scrub_device_dram(hdev, val);
9420 	if (rc) {
9421 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9422 		return rc;
9423 	}
9424 	return 0;
9425 }
9426 
9427 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9428 {
9429 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9430 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9431 	u32 val, size, offset;
9432 	int dcore_id;
9433 
9434 	offset = hdev->asic_prop.first_available_cq[0] * 4;
9435 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9436 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9437 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9438 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9439 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9440 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9441 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9442 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9443 
9444 	/* memset dcore0 CQ registers */
9445 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9446 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9447 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9448 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9449 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9450 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9451 
9452 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9453 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9454 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9455 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9456 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9457 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9458 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9459 
9460 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9461 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9462 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9463 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9464 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9465 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9466 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9467 
9468 		cq_lbw_l_addr += DCORE_OFFSET;
9469 		cq_lbw_h_addr += DCORE_OFFSET;
9470 		cq_lbw_data_addr += DCORE_OFFSET;
9471 		cq_base_l_addr += DCORE_OFFSET;
9472 		cq_base_h_addr += DCORE_OFFSET;
9473 		cq_size_addr += DCORE_OFFSET;
9474 	}
9475 
9476 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9477 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9478 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9479 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9480 
9481 	/* memset dcore0 monitors */
9482 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9483 
9484 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9485 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
9486 
9487 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9488 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9489 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9490 
9491 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9492 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9493 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9494 		mon_sts_addr += DCORE_OFFSET;
9495 		mon_cfg_addr += DCORE_OFFSET;
9496 	}
9497 
9498 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9499 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9500 	val = 0;
9501 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9502 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9503 
9504 	/* memset dcore0 sobs */
9505 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9506 
9507 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9508 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9509 
9510 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9511 		gaudi2_memset_device_lbw(hdev, addr, size, val);
9512 		addr += DCORE_OFFSET;
9513 	}
9514 
9515 	/* Flush all WREG to prevent race */
9516 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9517 }
9518 
9519 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9520 {
9521 	u32 reg_base, hw_queue_id;
9522 
9523 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9524 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9525 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9526 			continue;
9527 
9528 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9529 
9530 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9531 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9532 	}
9533 
9534 	/* Flush all WREG to prevent race */
9535 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9536 }
9537 
9538 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9539 {
9540 	u32 reg_base, hw_queue_id;
9541 
9542 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9543 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9544 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9545 			continue;
9546 
9547 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9548 
9549 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9550 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9551 	}
9552 
9553 	/* Flush all WREG to prevent race */
9554 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9555 }
9556 
9557 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9558 {
9559 	return 0;
9560 }
9561 
9562 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9563 {
9564 }
9565 
9566 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9567 						struct dup_block_ctx *cfg_ctx)
9568 {
9569 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9570 	u8 seq;
9571 	int i;
9572 
9573 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
9574 		seq = block_idx * cfg_ctx->instances + i;
9575 
9576 		/* skip disabled instance */
9577 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9578 			continue;
9579 
9580 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9581 					cfg_ctx->data);
9582 	}
9583 }
9584 
9585 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9586 						u64 mask)
9587 {
9588 	int i;
9589 
9590 	cfg_ctx->enabled_mask = mask;
9591 
9592 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
9593 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
9594 }
9595 
9596 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9597 {
9598 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9599 }
9600 
9601 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9602 {
9603 	void *host_mem_virtual_addr;
9604 	dma_addr_t host_mem_dma_addr;
9605 	u64 reserved_va_base;
9606 	u32 pos, size_left, size_to_dma;
9607 	struct hl_ctx *ctx;
9608 	int rc = 0;
9609 
9610 	/* Fetch the ctx */
9611 	ctx = hl_get_compute_ctx(hdev);
9612 	if (!ctx) {
9613 		dev_err(hdev->dev, "No ctx available\n");
9614 		return -EINVAL;
9615 	}
9616 
9617 	/* Allocate buffers for read and for poll */
9618 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9619 								GFP_KERNEL | __GFP_ZERO);
9620 	if (host_mem_virtual_addr == NULL) {
9621 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9622 		rc = -ENOMEM;
9623 		goto put_ctx;
9624 	}
9625 
9626 	/* Reserve VM region on asic side */
9627 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9628 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9629 	if (!reserved_va_base) {
9630 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9631 		rc = -ENOMEM;
9632 		goto free_data_buffer;
9633 	}
9634 
9635 	/* Create mapping on asic side */
9636 	mutex_lock(&hdev->mmu_lock);
9637 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9638 	hl_mmu_invalidate_cache_range(hdev, false,
9639 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9640 				      ctx->asid, reserved_va_base, SZ_2M);
9641 	mutex_unlock(&hdev->mmu_lock);
9642 	if (rc) {
9643 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9644 		goto unreserve_va;
9645 	}
9646 
9647 	/* Enable MMU on KDMA */
9648 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9649 
9650 	pos = 0;
9651 	size_left = size;
9652 	size_to_dma = SZ_2M;
9653 
9654 	while (size_left > 0) {
9655 		if (size_left < SZ_2M)
9656 			size_to_dma = size_left;
9657 
9658 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9659 		if (rc)
9660 			break;
9661 
9662 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9663 
9664 		if (size_left <= SZ_2M)
9665 			break;
9666 
9667 		pos += SZ_2M;
9668 		addr += SZ_2M;
9669 		size_left -= SZ_2M;
9670 	}
9671 
9672 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9673 
9674 	mutex_lock(&hdev->mmu_lock);
9675 	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9676 	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9677 				      ctx->asid, reserved_va_base, SZ_2M);
9678 	mutex_unlock(&hdev->mmu_lock);
9679 unreserve_va:
9680 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9681 free_data_buffer:
9682 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9683 put_ctx:
9684 	hl_ctx_put(ctx);
9685 
9686 	return rc;
9687 }
9688 
9689 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9690 {
9691 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9692 	int min_alloc_order, rc;
9693 
9694 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9695 		return 0;
9696 
9697 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9698 								HOST_SPACE_INTERNAL_CB_SZ,
9699 								&hdev->internal_cb_pool_dma_addr,
9700 								GFP_KERNEL | __GFP_ZERO);
9701 
9702 	if (!hdev->internal_cb_pool_virt_addr)
9703 		return -ENOMEM;
9704 
9705 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9706 					gaudi2_get_wait_cb_size(hdev)));
9707 
9708 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9709 	if (!hdev->internal_cb_pool) {
9710 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
9711 		rc = -ENOMEM;
9712 		goto free_internal_cb_pool;
9713 	}
9714 
9715 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9716 				HOST_SPACE_INTERNAL_CB_SZ, -1);
9717 	if (rc) {
9718 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9719 		rc = -EFAULT;
9720 		goto destroy_internal_cb_pool;
9721 	}
9722 
9723 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9724 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9725 
9726 	if (!hdev->internal_cb_va_base) {
9727 		rc = -ENOMEM;
9728 		goto destroy_internal_cb_pool;
9729 	}
9730 
9731 	mutex_lock(&hdev->mmu_lock);
9732 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9733 					HOST_SPACE_INTERNAL_CB_SZ);
9734 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9735 	mutex_unlock(&hdev->mmu_lock);
9736 
9737 	if (rc)
9738 		goto unreserve_internal_cb_pool;
9739 
9740 	return 0;
9741 
9742 unreserve_internal_cb_pool:
9743 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9744 destroy_internal_cb_pool:
9745 	gen_pool_destroy(hdev->internal_cb_pool);
9746 free_internal_cb_pool:
9747 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9748 					hdev->internal_cb_pool_dma_addr);
9749 
9750 	return rc;
9751 }
9752 
9753 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9754 {
9755 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9756 
9757 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9758 		return;
9759 
9760 	mutex_lock(&hdev->mmu_lock);
9761 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9762 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9763 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9764 	mutex_unlock(&hdev->mmu_lock);
9765 
9766 	gen_pool_destroy(hdev->internal_cb_pool);
9767 
9768 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9769 					hdev->internal_cb_pool_dma_addr);
9770 }
9771 
9772 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9773 {
9774 	gaudi2_restore_user_sm_registers(hdev);
9775 	gaudi2_restore_user_qm_registers(hdev);
9776 }
9777 
9778 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9779 {
9780 	struct hl_device *hdev = ctx->hdev;
9781 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9782 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9783 	int rc;
9784 
9785 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9786 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9787 	if (rc)
9788 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9789 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9790 
9791 	return rc;
9792 }
9793 
9794 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9795 {
9796 	struct hl_device *hdev = ctx->hdev;
9797 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9798 	int rc;
9799 
9800 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9801 				prop->pmmu.page_size, true);
9802 	if (rc)
9803 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9804 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9805 }
9806 
9807 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9808 {
9809 	int rc;
9810 
9811 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9812 	if (rc)
9813 		return rc;
9814 
9815 	/* No need to clear user registers if the device has just
9816 	 * performed reset, we restore only nic qm registers
9817 	 */
9818 	if (ctx->hdev->reset_upon_device_release)
9819 		gaudi2_restore_nic_qm_registers(ctx->hdev);
9820 	else
9821 		gaudi2_restore_user_registers(ctx->hdev);
9822 
9823 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9824 	if (rc)
9825 		return rc;
9826 
9827 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9828 	if (rc)
9829 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9830 
9831 	return rc;
9832 }
9833 
9834 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9835 {
9836 	if (ctx->asid == HL_KERNEL_ASID_ID)
9837 		return;
9838 
9839 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9840 
9841 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9842 }
9843 
9844 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9845 {
9846 	struct hl_device *hdev = cs->ctx->hdev;
9847 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9848 	u32 mon_payload, sob_id, mon_id;
9849 
9850 	if (!cs_needs_completion(cs))
9851 		return 0;
9852 
9853 	/*
9854 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
9855 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
9856 	 * cyclic index. The SOB value is increased when each of the CS jobs is
9857 	 * completed. When the SOB reaches the number of CS jobs, the monitor
9858 	 * generates MSI-X interrupt.
9859 	 */
9860 
9861 	sob_id = mon_id = index;
9862 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9863 				(1 << CQ_ENTRY_READY_SHIFT) | index;
9864 
9865 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9866 				cs->jobs_cnt);
9867 
9868 	return 0;
9869 }
9870 
9871 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9872 {
9873 	return HL_INVALID_QUEUE;
9874 }
9875 
9876 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9877 {
9878 	struct hl_cb *cb = data;
9879 	struct packet_msg_short *pkt;
9880 	u32 value, ctl, pkt_size = sizeof(*pkt);
9881 
9882 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9883 	memset(pkt, 0, pkt_size);
9884 
9885 	/* Inc by 1, Mode ADD */
9886 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9887 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9888 
9889 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9890 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9891 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9892 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9893 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9894 
9895 	pkt->value = cpu_to_le32(value);
9896 	pkt->ctl = cpu_to_le32(ctl);
9897 
9898 	return size + pkt_size;
9899 }
9900 
9901 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9902 {
9903 	u32 ctl, pkt_size = sizeof(*pkt);
9904 
9905 	memset(pkt, 0, pkt_size);
9906 
9907 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9908 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
9909 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9910 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9911 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9912 
9913 	pkt->value = cpu_to_le32(value);
9914 	pkt->ctl = cpu_to_le32(ctl);
9915 
9916 	return pkt_size;
9917 }
9918 
9919 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9920 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
9921 {
9922 	u32 ctl, value, pkt_size = sizeof(*pkt);
9923 	u8 mask;
9924 
9925 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
9926 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
9927 		return 0;
9928 	}
9929 
9930 	memset(pkt, 0, pkt_size);
9931 
9932 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
9933 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
9934 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
9935 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
9936 
9937 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9938 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
9939 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9940 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9941 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9942 
9943 	pkt->value = cpu_to_le32(value);
9944 	pkt->ctl = cpu_to_le32(ctl);
9945 
9946 	return pkt_size;
9947 }
9948 
9949 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
9950 {
9951 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
9952 
9953 	memset(pkt, 0, pkt_size);
9954 
9955 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
9956 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
9957 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
9958 
9959 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
9960 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9961 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9962 
9963 	pkt->cfg = cpu_to_le32(cfg);
9964 	pkt->ctl = cpu_to_le32(ctl);
9965 
9966 	return pkt_size;
9967 }
9968 
9969 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
9970 {
9971 	struct hl_cb *cb = prop->data;
9972 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
9973 
9974 	u64 monitor_base, fence_addr = 0;
9975 	u32 stream_index, size = prop->size;
9976 	u16 msg_addr_offset;
9977 
9978 	stream_index = prop->q_idx % 4;
9979 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
9980 			QM_FENCE2_OFFSET + stream_index * 4;
9981 
9982 	/*
9983 	 * monitor_base should be the content of the base0 address registers,
9984 	 * so it will be added to the msg short offsets
9985 	 */
9986 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9987 
9988 	/* First monitor config packet: low address of the sync */
9989 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
9990 				monitor_base;
9991 
9992 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
9993 
9994 	/* Second monitor config packet: high address of the sync */
9995 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
9996 				monitor_base;
9997 
9998 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
9999 
10000 	/*
10001 	 * Third monitor config packet: the payload, i.e. what to write when the
10002 	 * sync triggers
10003 	 */
10004 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10005 				monitor_base;
10006 
10007 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10008 
10009 	/* Fourth monitor config packet: bind the monitor to a sync object */
10010 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10011 
10012 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10013 						prop->sob_val, msg_addr_offset);
10014 
10015 	/* Fence packet */
10016 	size += gaudi2_add_fence_pkt(buf + size);
10017 
10018 	return size;
10019 }
10020 
10021 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10022 {
10023 	struct hl_hw_sob *hw_sob = data;
10024 
10025 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10026 
10027 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10028 
10029 	kref_init(&hw_sob->kref);
10030 }
10031 
10032 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10033 {
10034 }
10035 
10036 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10037 {
10038 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10039 
10040 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10041 }
10042 
10043 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10044 {
10045 	return 0;
10046 }
10047 
10048 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10049 					struct hl_cs *cs, u32 wait_queue_id,
10050 					u32 collective_engine_id, u32 encaps_signal_offset)
10051 {
10052 	return -EINVAL;
10053 }
10054 
10055 /*
10056  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10057  *                   to DMMU page-size address (64MB) before mapping it in
10058  *                   the MMU.
10059  * The operation is performed on both the virtual and physical addresses.
10060  * for device with 6 HBMs the scramble is:
10061  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10062  *
10063  * Example:
10064  * =============================================================================
10065  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10066  * Phys address                                                     in MMU last
10067  *                                                                    HOP
10068  * =============================================================================
10069  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10070  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10071  * =============================================================================
10072  */
10073 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10074 {
10075 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10076 	u32 divisor, mod_va;
10077 	u64 div_va;
10078 
10079 	/* accept any address in the DRAM address space */
10080 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10081 									VA_HBM_SPACE_END)) {
10082 
10083 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10084 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10085 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10086 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10087 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10088 	}
10089 
10090 	return raw_addr;
10091 }
10092 
10093 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10094 {
10095 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10096 	u32 divisor, mod_va;
10097 	u64 div_va;
10098 
10099 	/* accept any address in the DRAM address space */
10100 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10101 									VA_HBM_SPACE_END)) {
10102 
10103 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10104 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10105 					PAGE_SIZE_64MB, &mod_va);
10106 
10107 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10108 					(div_va * divisor + mod_va));
10109 	}
10110 
10111 	return scrambled_addr;
10112 }
10113 
10114 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10115 {
10116 	u32 base = 0, dcore_id, dec_id;
10117 
10118 	if (core_id >= NUMBER_OF_DEC) {
10119 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10120 		goto out;
10121 	}
10122 
10123 	if (core_id < 8) {
10124 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10125 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10126 
10127 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10128 				dec_id * DCORE_VDEC_OFFSET;
10129 	} else {
10130 		/* PCIe Shared Decoder */
10131 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10132 	}
10133 out:
10134 	return base;
10135 }
10136 
10137 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10138 				u32 *block_size, u32 *block_id)
10139 {
10140 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10141 	int i;
10142 
10143 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10144 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10145 			*block_id = i;
10146 			if (block_size)
10147 				*block_size = gaudi2->mapped_blocks[i].size;
10148 			return 0;
10149 		}
10150 	}
10151 
10152 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10153 
10154 	return -EINVAL;
10155 }
10156 
10157 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10158 			u32 block_id, u32 block_size)
10159 {
10160 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10161 	u64 offset_in_bar;
10162 	u64 address;
10163 	int rc;
10164 
10165 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10166 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10167 		return -EINVAL;
10168 	}
10169 
10170 	/* we allow mapping only an entire block */
10171 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10172 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10173 		return -EINVAL;
10174 	}
10175 
10176 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10177 
10178 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10179 
10180 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10181 			VM_DONTCOPY | VM_NORESERVE;
10182 
10183 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10184 			block_size, vma->vm_page_prot);
10185 	if (rc)
10186 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10187 
10188 	return rc;
10189 }
10190 
10191 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10192 {
10193 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10194 
10195 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10196 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10197 
10198 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10199 		WREG32(irq_handler_offset,
10200 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10201 }
10202 
10203 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10204 {
10205 	switch (mmu_id) {
10206 	case HW_CAP_DCORE0_DMMU0:
10207 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10208 		break;
10209 	case HW_CAP_DCORE0_DMMU1:
10210 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10211 		break;
10212 	case HW_CAP_DCORE0_DMMU2:
10213 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10214 		break;
10215 	case HW_CAP_DCORE0_DMMU3:
10216 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10217 		break;
10218 	case HW_CAP_DCORE1_DMMU0:
10219 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10220 		break;
10221 	case HW_CAP_DCORE1_DMMU1:
10222 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10223 		break;
10224 	case HW_CAP_DCORE1_DMMU2:
10225 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10226 		break;
10227 	case HW_CAP_DCORE1_DMMU3:
10228 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10229 		break;
10230 	case HW_CAP_DCORE2_DMMU0:
10231 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10232 		break;
10233 	case HW_CAP_DCORE2_DMMU1:
10234 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10235 		break;
10236 	case HW_CAP_DCORE2_DMMU2:
10237 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10238 		break;
10239 	case HW_CAP_DCORE2_DMMU3:
10240 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10241 		break;
10242 	case HW_CAP_DCORE3_DMMU0:
10243 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10244 		break;
10245 	case HW_CAP_DCORE3_DMMU1:
10246 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10247 		break;
10248 	case HW_CAP_DCORE3_DMMU2:
10249 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10250 		break;
10251 	case HW_CAP_DCORE3_DMMU3:
10252 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10253 		break;
10254 	case HW_CAP_PMMU:
10255 		*mmu_base = mmPMMU_HBW_MMU_BASE;
10256 		break;
10257 	default:
10258 		return -EINVAL;
10259 	}
10260 
10261 	return 0;
10262 }
10263 
10264 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10265 {
10266 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10267 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10268 	u32 mmu_base;
10269 
10270 	if (!(gaudi2->hw_cap_initialized & mmu_id))
10271 		return;
10272 
10273 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10274 		return;
10275 
10276 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10277 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10278 }
10279 
10280 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10281 {
10282 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10283 
10284 	/* check all HMMUs */
10285 	for (i = 0 ; i < num_of_hmmus ; i++) {
10286 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10287 
10288 		if (mmu_cap_mask & mmu_id)
10289 			gaudi2_ack_mmu_error(hdev, mmu_id);
10290 	}
10291 
10292 	/* check PMMU */
10293 	if (mmu_cap_mask & HW_CAP_PMMU)
10294 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10295 
10296 	return 0;
10297 }
10298 
10299 static void gaudi2_get_msi_info(__le32 *table)
10300 {
10301 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10302 }
10303 
10304 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10305 {
10306 	switch (pll_idx) {
10307 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10308 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10309 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10310 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10311 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10312 	case HL_GAUDI2_MME_PLL: return MME_PLL;
10313 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10314 	case HL_GAUDI2_IF_PLL: return IF_PLL;
10315 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10316 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10317 	case HL_GAUDI2_VID_PLL: return VID_PLL;
10318 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10319 	default: return -EINVAL;
10320 	}
10321 }
10322 
10323 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10324 {
10325 	/* Not implemented */
10326 	return 0;
10327 }
10328 
10329 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10330 {
10331 	/* Not implemented */
10332 	return 0;
10333 }
10334 
10335 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10336 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
10337 {
10338 	/* Not implemented */
10339 	return 0;
10340 }
10341 
10342 
10343 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10344 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
10345 				u32 engine_id, char **buf, size_t *size, size_t *offset)
10346 {
10347 	/* Not implemented */
10348 	return 0;
10349 }
10350 
10351 
10352 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10353 	.monitor_valid = gaudi2_monitor_valid,
10354 	.print_single_monitor = gaudi2_print_single_monitor,
10355 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10356 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
10357 };
10358 
10359 static void gaudi2_state_dump_init(struct hl_device *hdev)
10360 {
10361 	/* Not implemented */
10362 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10363 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10364 }
10365 
10366 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10367 {
10368 	return 0;
10369 }
10370 
10371 static u32 *gaudi2_get_stream_master_qid_arr(void)
10372 {
10373 	return NULL;
10374 }
10375 
10376 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10377 				struct attribute_group *dev_vrm_attr_grp)
10378 {
10379 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10380 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10381 }
10382 
10383 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10384 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
10385 {
10386 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10387 
10388 	/* for host pages the page size must be  */
10389 	if (!is_dram_addr) {
10390 		if (page_size % mmu_prop->page_size)
10391 			goto page_size_err;
10392 
10393 		*real_page_size = mmu_prop->page_size;
10394 		return 0;
10395 	}
10396 
10397 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10398 		goto page_size_err;
10399 
10400 	/*
10401 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10402 	 * than DRAM page size).
10403 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10404 	 * this mismatch when calculating the address to place in the MMU page table.
10405 	 * (in that case also make sure that the dram_page_size is not greater than the
10406 	 * mmu page size)
10407 	 */
10408 	*real_page_size = prop->dram_page_size;
10409 
10410 	return 0;
10411 
10412 page_size_err:
10413 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10414 							page_size, mmu_prop->page_size >> 10);
10415 	return -EFAULT;
10416 }
10417 
10418 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10419 {
10420 	return -EOPNOTSUPP;
10421 }
10422 
10423 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10424 {
10425 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10426 
10427 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
10428 		return 0;
10429 
10430 	return hl_fw_send_device_activity(hdev, open);
10431 }
10432 
10433 static const struct hl_asic_funcs gaudi2_funcs = {
10434 	.early_init = gaudi2_early_init,
10435 	.early_fini = gaudi2_early_fini,
10436 	.late_init = gaudi2_late_init,
10437 	.late_fini = gaudi2_late_fini,
10438 	.sw_init = gaudi2_sw_init,
10439 	.sw_fini = gaudi2_sw_fini,
10440 	.hw_init = gaudi2_hw_init,
10441 	.hw_fini = gaudi2_hw_fini,
10442 	.halt_engines = gaudi2_halt_engines,
10443 	.suspend = gaudi2_suspend,
10444 	.resume = gaudi2_resume,
10445 	.mmap = gaudi2_mmap,
10446 	.ring_doorbell = gaudi2_ring_doorbell,
10447 	.pqe_write = gaudi2_pqe_write,
10448 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10449 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
10450 	.scrub_device_mem = gaudi2_scrub_device_mem,
10451 	.scrub_device_dram = gaudi2_scrub_device_dram,
10452 	.get_int_queue_base = NULL,
10453 	.test_queues = gaudi2_test_queues,
10454 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10455 	.asic_dma_pool_free = gaudi2_dma_pool_free,
10456 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10457 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10458 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
10459 	.asic_dma_map_single = gaudi2_dma_map_single,
10460 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10461 	.cs_parser = gaudi2_cs_parser,
10462 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
10463 	.add_end_of_cb_packets = NULL,
10464 	.update_eq_ci = gaudi2_update_eq_ci,
10465 	.context_switch = gaudi2_context_switch,
10466 	.restore_phase_topology = gaudi2_restore_phase_topology,
10467 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
10468 	.add_device_attr = gaudi2_add_device_attr,
10469 	.handle_eqe = gaudi2_handle_eqe,
10470 	.get_events_stat = gaudi2_get_events_stat,
10471 	.read_pte = NULL,
10472 	.write_pte = NULL,
10473 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10474 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10475 	.mmu_prefetch_cache_range = NULL,
10476 	.send_heartbeat = gaudi2_send_heartbeat,
10477 	.debug_coresight = gaudi2_debug_coresight,
10478 	.is_device_idle = gaudi2_is_device_idle,
10479 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
10480 	.hw_queues_lock = gaudi2_hw_queues_lock,
10481 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
10482 	.get_pci_id = gaudi2_get_pci_id,
10483 	.get_eeprom_data = gaudi2_get_eeprom_data,
10484 	.get_monitor_dump = gaudi2_get_monitor_dump,
10485 	.send_cpu_message = gaudi2_send_cpu_message,
10486 	.pci_bars_map = gaudi2_pci_bars_map,
10487 	.init_iatu = gaudi2_init_iatu,
10488 	.rreg = hl_rreg,
10489 	.wreg = hl_wreg,
10490 	.halt_coresight = gaudi2_halt_coresight,
10491 	.ctx_init = gaudi2_ctx_init,
10492 	.ctx_fini = gaudi2_ctx_fini,
10493 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
10494 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10495 	.load_firmware_to_device = NULL,
10496 	.load_boot_fit_to_device = NULL,
10497 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
10498 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
10499 	.gen_signal_cb = gaudi2_gen_signal_cb,
10500 	.gen_wait_cb = gaudi2_gen_wait_cb,
10501 	.reset_sob = gaudi2_reset_sob,
10502 	.reset_sob_group = gaudi2_reset_sob_group,
10503 	.get_device_time = gaudi2_get_device_time,
10504 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
10505 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10506 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10507 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
10508 	.scramble_addr = gaudi2_mmu_scramble_addr,
10509 	.descramble_addr = gaudi2_mmu_descramble_addr,
10510 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10511 	.get_hw_block_id = gaudi2_get_hw_block_id,
10512 	.hw_block_mmap = gaudi2_block_mmap,
10513 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
10514 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10515 	.get_msi_info = gaudi2_get_msi_info,
10516 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10517 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10518 	.init_firmware_loader = gaudi2_init_firmware_loader,
10519 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10520 	.state_dump_init = gaudi2_state_dump_init,
10521 	.get_sob_addr = &gaudi2_get_sob_addr,
10522 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10523 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10524 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10525 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10526 	.access_dev_mem = hl_access_dev_mem,
10527 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
10528 	.set_engine_cores = gaudi2_set_engine_cores,
10529 	.send_device_activity = gaudi2_send_device_activity,
10530 	.set_dram_properties = gaudi2_set_dram_properties,
10531 	.set_binning_masks = gaudi2_set_binning_masks,
10532 };
10533 
10534 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10535 {
10536 	hdev->asic_funcs = &gaudi2_funcs;
10537 }
10538