xref: /openbmc/linux/drivers/accel/habanalabs/gaudi2/gaudi2.c (revision 54fcb384be6044ef17ec95a0aec0f86ad114b8d6)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11 #include "../include/hw_ip/mmu/mmu_v2_0.h"
12 #include "../include/gaudi2/gaudi2_packets.h"
13 #include "../include/gaudi2/gaudi2_reg_map.h"
14 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
15 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
16 
17 #include <linux/module.h>
18 #include <linux/pci.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 
22 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
23 
24 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		50000		/* 50ms */
26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
28 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
29 #define GAUDI2_RESET_POLL_CNT			3
30 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
31 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
32 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
33 #define GAUDI2_CB_POOL_CB_CNT			512
34 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
35 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
36 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
37 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
38 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
39 
40 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
41 
42 /*
43  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
44  * and the code relies on that value (for array size etc..) we define another value
45  * for MAX faulty TPCs which reflects the cluster binning requirements
46  */
47 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
48 #define MAX_FAULTY_XBARS			1
49 #define MAX_FAULTY_EDMAS			1
50 #define MAX_FAULTY_DECODERS			1
51 
52 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
53 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
54 #define GAUDI2_DECODER_FULL_MASK		0x3FF
55 
56 #define GAUDI2_NA_EVENT_CAUSE			0xFF
57 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
58 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
59 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
60 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
61 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
63 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
64 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
65 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
66 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
67 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
68 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
69 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
70 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
71 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
72 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
73 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
74 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
75 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
76 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
78 
79 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
80 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
81 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
82 
83 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
84 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
85 
86 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
87 
88 #define IS_DMA_IDLE(dma_core_idle_ind_mask)	\
89 	(!((dma_core_idle_ind_mask) &		\
90 	((DCORE0_EDMA0_CORE_IDLE_IND_MASK_DESC_CNT_STS_MASK) | \
91 	(DCORE0_EDMA0_CORE_IDLE_IND_MASK_COMP_MASK))))
92 
93 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
94 
95 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
96 
97 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
98 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
99 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
100 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
101 
102 #define PCIE_DEC_EN_MASK			0x300
103 #define DEC_WORK_STATE_IDLE			0
104 #define DEC_WORK_STATE_PEND			3
105 #define IS_DEC_IDLE(dec_swreg15) \
106 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
107 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
108 
109 /* HBM MMU address scrambling parameters */
110 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
111 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
112 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
113 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
114 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
115 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
116 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
117 #define MMU_RANGE_INV_EN_SHIFT			0
118 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
119 #define MMU_RANGE_INV_ASID_SHIFT		2
120 
121 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
122  * a 2 entries FIFO, and hence it is not enabled for it.
123  */
124 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
125 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
126 
127 #define GAUDI2_MAX_STRING_LEN			64
128 
129 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
130 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
131 
132 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
133 
134 enum hl_pmmu_fatal_cause {
135 	LATENCY_RD_OUT_FIFO_OVERRUN,
136 	LATENCY_WR_OUT_FIFO_OVERRUN,
137 };
138 
139 enum hl_pcie_drain_ind_cause {
140 	LBW_AXI_DRAIN_IND,
141 	HBW_AXI_DRAIN_IND
142 };
143 
144 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
145 	[HBM_ID0] = 0xFFFC,
146 	[HBM_ID1] = 0xFFCF,
147 	[HBM_ID2] = 0xF7F7,
148 	[HBM_ID3] = 0x7F7F,
149 	[HBM_ID4] = 0xFCFF,
150 	[HBM_ID5] = 0xCFFF,
151 };
152 
153 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
154 	[0] = HBM_ID0,
155 	[1] = HBM_ID1,
156 	[2] = HBM_ID4,
157 	[3] = HBM_ID5,
158 };
159 
160 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
161 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
162 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
163 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
164 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
165 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
166 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
167 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
168 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
169 };
170 
171 static const int gaudi2_qman_async_event_id[] = {
172 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
173 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
174 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
175 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
176 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
177 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
178 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
179 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
180 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
181 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
182 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
183 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
184 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
185 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
186 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
187 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
188 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
189 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
190 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
191 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
192 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
193 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
194 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
195 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
196 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
197 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
198 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
199 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
200 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
201 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
202 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
203 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
204 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
205 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
206 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
207 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
208 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
209 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
210 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
211 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
212 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
213 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
214 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
215 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
216 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
217 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
218 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
219 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
220 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
221 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
222 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
223 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
224 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
225 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
226 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
227 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
228 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
229 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
230 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
231 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
232 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
233 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
234 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
235 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
236 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
237 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
238 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
239 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
240 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
241 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
242 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
243 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
244 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
245 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
246 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
247 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
248 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
249 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
250 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
251 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
252 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
253 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
254 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
255 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
256 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
257 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
258 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
259 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
260 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
261 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
262 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
263 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
264 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
265 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
266 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
267 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
268 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
269 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
270 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
271 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
272 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
273 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
274 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
275 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
276 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
277 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
278 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
279 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
280 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
281 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
282 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
283 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
284 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
285 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
286 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
287 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
288 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
289 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
290 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
291 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
292 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
293 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
294 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
295 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
296 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
297 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
298 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
299 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
300 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
301 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
302 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
303 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
304 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
305 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
306 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
307 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
308 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
309 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
310 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
311 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
312 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
313 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
314 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
315 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
316 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
317 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
318 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
319 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
320 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
321 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
322 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
323 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
324 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
325 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
326 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
327 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
328 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
329 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
330 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
331 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
332 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
333 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
334 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
335 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
336 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
337 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
338 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
339 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
340 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
341 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
342 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
343 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
344 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
345 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
346 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
347 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
348 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
349 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
350 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
351 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
352 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
353 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
354 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
355 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
356 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
357 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
358 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
359 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
360 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
361 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
362 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
363 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
364 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
365 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
366 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
367 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
368 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
369 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
370 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
371 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
372 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
373 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
374 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
375 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
376 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
377 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
378 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
379 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
380 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
381 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
382 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
383 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
384 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
385 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
386 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
387 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
388 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
389 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
390 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
391 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
392 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
393 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
394 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
395 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
396 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
397 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
398 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
399 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
400 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
401 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
402 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
403 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
404 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
405 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
406 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
407 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
408 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
409 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
410 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
411 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
412 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
413 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
414 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
415 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
416 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
417 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
418 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
419 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
420 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
421 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
422 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
423 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
424 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
425 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
426 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
427 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
428 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
429 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
430 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
431 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
432 };
433 
434 static const int gaudi2_dma_core_async_event_id[] = {
435 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
436 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
437 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
438 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
439 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
440 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
441 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
442 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
443 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
444 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
445 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
446 };
447 
448 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
449 	"qman sei intr",
450 	"arc sei intr"
451 };
452 
453 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
454 	"AXI_TERMINATOR WR",
455 	"AXI_TERMINATOR RD",
456 	"AXI SPLIT SEI Status"
457 };
458 
459 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
460 	"cbu_bresp_sei_intr_cause",
461 	"cbu_rresp_sei_intr_cause",
462 	"lbu_bresp_sei_intr_cause",
463 	"lbu_rresp_sei_intr_cause",
464 	"cbu_axi_split_intr_cause",
465 	"lbu_axi_split_intr_cause",
466 	"arc_ip_excptn_sei_intr_cause",
467 	"dmi_bresp_sei_intr_cause",
468 	"aux2apb_err_sei_intr_cause",
469 	"cfg_lbw_wr_terminated_intr_cause",
470 	"cfg_lbw_rd_terminated_intr_cause",
471 	"cfg_dccm_wr_terminated_intr_cause",
472 	"cfg_dccm_rd_terminated_intr_cause",
473 	"cfg_hbw_rd_terminated_intr_cause"
474 };
475 
476 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
477 	"msix_vcd_hbw_sei",
478 	"msix_l2c_hbw_sei",
479 	"msix_nrm_hbw_sei",
480 	"msix_abnrm_hbw_sei",
481 	"msix_vcd_lbw_sei",
482 	"msix_l2c_lbw_sei",
483 	"msix_nrm_lbw_sei",
484 	"msix_abnrm_lbw_sei",
485 	"apb_vcd_lbw_sei",
486 	"apb_l2c_lbw_sei",
487 	"apb_nrm_lbw_sei",
488 	"apb_abnrm_lbw_sei",
489 	"dec_sei",
490 	"dec_apb_sei",
491 	"trc_apb_sei",
492 	"lbw_mstr_if_sei",
493 	"axi_split_bresp_err_sei",
494 	"hbw_axi_wr_viol_sei",
495 	"hbw_axi_rd_viol_sei",
496 	"lbw_axi_wr_viol_sei",
497 	"lbw_axi_rd_viol_sei",
498 	"vcd_spi",
499 	"l2c_spi",
500 	"nrm_spi",
501 	"abnrm_spi",
502 };
503 
504 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
505 	"PQ AXI HBW error",
506 	"CQ AXI HBW error",
507 	"CP AXI HBW error",
508 	"CP error due to undefined OPCODE",
509 	"CP encountered STOP OPCODE",
510 	"CP AXI LBW error",
511 	"CP WRREG32 or WRBULK returned error",
512 	"N/A",
513 	"FENCE 0 inc over max value and clipped",
514 	"FENCE 1 inc over max value and clipped",
515 	"FENCE 2 inc over max value and clipped",
516 	"FENCE 3 inc over max value and clipped",
517 	"FENCE 0 dec under min value and clipped",
518 	"FENCE 1 dec under min value and clipped",
519 	"FENCE 2 dec under min value and clipped",
520 	"FENCE 3 dec under min value and clipped",
521 	"CPDMA Up overflow",
522 	"PQC L2H error"
523 };
524 
525 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
526 	"RSVD0",
527 	"CQ AXI HBW error",
528 	"CP AXI HBW error",
529 	"CP error due to undefined OPCODE",
530 	"CP encountered STOP OPCODE",
531 	"CP AXI LBW error",
532 	"CP WRREG32 or WRBULK returned error",
533 	"N/A",
534 	"FENCE 0 inc over max value and clipped",
535 	"FENCE 1 inc over max value and clipped",
536 	"FENCE 2 inc over max value and clipped",
537 	"FENCE 3 inc over max value and clipped",
538 	"FENCE 0 dec under min value and clipped",
539 	"FENCE 1 dec under min value and clipped",
540 	"FENCE 2 dec under min value and clipped",
541 	"FENCE 3 dec under min value and clipped",
542 	"CPDMA Up overflow",
543 	"RSVD17",
544 	"CQ_WR_IFIFO_CI_ERR",
545 	"CQ_WR_CTL_CI_ERR",
546 	"ARC_CQF_RD_ERR",
547 	"ARC_CQ_WR_IFIFO_CI_ERR",
548 	"ARC_CQ_WR_CTL_CI_ERR",
549 	"ARC_AXI_ERR",
550 	"CP_SWITCH_WDT_ERR"
551 };
552 
553 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
554 	"Choice push while full error",
555 	"Choice Q watchdog error",
556 	"MSG AXI LBW returned with error"
557 };
558 
559 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
560 	"qm_axi_err",
561 	"qm_trace_fence_events",
562 	"qm_sw_err",
563 	"qm_cp_sw_stop",
564 	"lbw_mstr_rresp_err",
565 	"lbw_mstr_bresp_err",
566 	"lbw_msg_slverr",
567 	"hbw_msg_slverr",
568 	"wbc_slverr",
569 	"hbw_mstr_rresp_err",
570 	"hbw_mstr_bresp_err",
571 	"sb_resp_intr",
572 	"mrsb_resp_intr",
573 	"core_dw_status_0",
574 	"core_dw_status_1",
575 	"core_dw_status_2",
576 	"core_dw_status_3",
577 	"core_dw_status_4",
578 	"core_dw_status_5",
579 	"core_dw_status_6",
580 	"core_dw_status_7",
581 	"async_arc2cpu_sei_intr",
582 };
583 
584 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
585 	"tpc_address_exceed_slm",
586 	"tpc_div_by_0",
587 	"tpc_spu_mac_overflow",
588 	"tpc_spu_addsub_overflow",
589 	"tpc_spu_abs_overflow",
590 	"tpc_spu_fma_fp_dst_nan",
591 	"tpc_spu_fma_fp_dst_inf",
592 	"tpc_spu_convert_fp_dst_nan",
593 	"tpc_spu_convert_fp_dst_inf",
594 	"tpc_spu_fp_dst_denorm",
595 	"tpc_vpu_mac_overflow",
596 	"tpc_vpu_addsub_overflow",
597 	"tpc_vpu_abs_overflow",
598 	"tpc_vpu_convert_fp_dst_nan",
599 	"tpc_vpu_convert_fp_dst_inf",
600 	"tpc_vpu_fma_fp_dst_nan",
601 	"tpc_vpu_fma_fp_dst_inf",
602 	"tpc_vpu_fp_dst_denorm",
603 	"tpc_assertions",
604 	"tpc_illegal_instruction",
605 	"tpc_pc_wrap_around",
606 	"tpc_qm_sw_err",
607 	"tpc_hbw_rresp_err",
608 	"tpc_hbw_bresp_err",
609 	"tpc_lbw_rresp_err",
610 	"tpc_lbw_bresp_err",
611 	"st_unlock_already_locked",
612 	"invalid_lock_access",
613 	"LD_L protection violation",
614 	"ST_L protection violation",
615 };
616 
617 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
618 	"agu_resp_intr",
619 	"qman_axi_err",
620 	"wap sei (wbc axi err)",
621 	"arc sei",
622 	"cfg access error",
623 	"qm_sw_err",
624 	"sbte_dbg_intr_0",
625 	"sbte_dbg_intr_1",
626 	"sbte_dbg_intr_2",
627 	"sbte_dbg_intr_3",
628 	"sbte_dbg_intr_4",
629 	"sbte_prtn_intr_0",
630 	"sbte_prtn_intr_1",
631 	"sbte_prtn_intr_2",
632 	"sbte_prtn_intr_3",
633 	"sbte_prtn_intr_4",
634 };
635 
636 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
637 	"i0",
638 	"i1",
639 	"i2",
640 	"i3",
641 	"i4",
642 };
643 
644 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
645 	"WBC ERR RESP_0",
646 	"WBC ERR RESP_1",
647 	"AP SOURCE POS INF",
648 	"AP SOURCE NEG INF",
649 	"AP SOURCE NAN",
650 	"AP RESULT POS INF",
651 	"AP RESULT NEG INF",
652 };
653 
654 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
655 	"HBW Read returned with error RRESP",
656 	"HBW write returned with error BRESP",
657 	"LBW write returned with error BRESP",
658 	"descriptor_fifo_overflow",
659 	"KDMA SB LBW Read returned with error",
660 	"KDMA WBC LBW Write returned with error",
661 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
662 	"WRONG CFG FOR COMMIT IN LIN DMA"
663 };
664 
665 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
666 	"HBW/LBW Read returned with error RRESP",
667 	"HBW/LBW write returned with error BRESP",
668 	"LBW write returned with error BRESP",
669 	"descriptor_fifo_overflow",
670 	"KDMA SB LBW Read returned with error",
671 	"KDMA WBC LBW Write returned with error",
672 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
673 	"WRONG CFG FOR COMMIT IN LIN DMA"
674 };
675 
676 struct gaudi2_sm_sei_cause_data {
677 	const char *cause_name;
678 	const char *log_name;
679 };
680 
681 static const struct gaudi2_sm_sei_cause_data
682 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
683 	{"calculated SO value overflow/underflow", "SOB ID"},
684 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
685 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
686 };
687 
688 static const char * const
689 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
690 	"LATENCY_RD_OUT_FIFO_OVERRUN",
691 	"LATENCY_WR_OUT_FIFO_OVERRUN",
692 };
693 
694 static const char * const
695 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
696 	"LATENCY_RD_OUT_FIFO_OVERRUN",
697 	"LATENCY_WR_OUT_FIFO_OVERRUN",
698 };
699 
700 static const char * const
701 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
702 	"AXI drain HBW",
703 	"AXI drain LBW",
704 };
705 
706 static const char * const
707 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
708 	"HBW error response",
709 	"LBW error response",
710 	"TLP is blocked by RR"
711 };
712 
713 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
714 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
715 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
716 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
717 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
718 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
719 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
720 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
721 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
722 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
723 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
724 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
725 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
726 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
727 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
728 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
729 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
730 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
731 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
732 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
733 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
734 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
735 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
736 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
737 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
738 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
739 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
740 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
741 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
742 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
743 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
744 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
745 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
746 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
747 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
748 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
749 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
750 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
751 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
752 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
753 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
754 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
755 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
756 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
757 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
758 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
759 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
760 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
761 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
762 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
763 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
764 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
765 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
766 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
767 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
768 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
769 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
770 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
771 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
772 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
773 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
774 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
775 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
776 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
777 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
778 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
779 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
780 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
781 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
782 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
783 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
784 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
785 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
786 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
787 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
788 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
789 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
790 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
791 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
792 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
793 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
794 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
795 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
796 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
797 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
798 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
799 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
800 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
801 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
802 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
803 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
804 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
805 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
806 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
807 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
808 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
809 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
810 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
811 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
812 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
813 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
814 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
815 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
816 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
817 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
818 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
819 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
820 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
821 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
822 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
823 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
824 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
825 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
826 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
827 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
828 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
829 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
830 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
831 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
832 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
833 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
834 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
835 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
836 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
837 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
838 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
839 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
840 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
841 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
842 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
843 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
844 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
845 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
846 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
847 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
848 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
849 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
850 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
851 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
852 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
853 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
854 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
855 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
856 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
857 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
858 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
859 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
860 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
861 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
862 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
863 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
864 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
865 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
866 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
867 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
868 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
869 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
870 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
871 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
872 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
873 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
874 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
875 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
876 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
877 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
878 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
879 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
880 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
881 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
882 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
883 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
884 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
885 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
886 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
887 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
888 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
889 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
890 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
891 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
892 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
893 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
894 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
895 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
896 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
897 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
898 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
899 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
900 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
901 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
902 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
903 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
904 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
905 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
906 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
907 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
908 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
909 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
910 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
911 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
912 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
913 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
914 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
915 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
916 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
917 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
918 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
919 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
920 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
921 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
922 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
923 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
924 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
925 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
926 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
927 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
928 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
929 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
930 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
931 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
932 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
933 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
934 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
935 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
936 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
937 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
938 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
939 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
940 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
941 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
942 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
943 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
944 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
945 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
946 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
947 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
948 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
949 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
950 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
951 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
952 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
953 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
954 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
955 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
956 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
957 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
958 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
959 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
960 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
961 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
962 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
963 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
964 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
965 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
966 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
967 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
968 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
969 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
970 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
971 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
972 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
973 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
974 };
975 
976 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
977 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
978 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
979 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
980 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
981 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
982 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
983 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
984 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
985 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
986 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
987 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
988 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
989 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
990 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
991 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
992 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
993 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
994 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
995 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
996 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
997 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
998 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
999 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1000 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1001 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1002 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1003 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1004 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1005 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1006 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1007 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1008 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1009 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1010 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1011 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1012 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1013 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1014 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1015 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1016 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1017 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1018 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1019 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1020 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1021 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1022 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1023 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1024 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1025 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1026 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1027 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1028 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1029 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1030 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1031 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1032 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1033 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1034 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1035 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1036 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1037 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1038 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1039 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1040 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1041 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1042 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1043 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1044 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1045 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1046 };
1047 
1048 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1049 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1050 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1051 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1052 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1053 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1054 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1055 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1056 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1057 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1058 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1059 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1060 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1061 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1062 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1063 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1064 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1065 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1066 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1067 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1068 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1069 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1070 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1071 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1072 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1073 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1074 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1075 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1076 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1077 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1078 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1079 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1080 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1081 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1082 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1083 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1084 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1085 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1086 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1087 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1088 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1089 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1090 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1091 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1092 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1093 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1094 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1095 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1096 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1097 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1098 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1099 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1100 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1101 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1102 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1103 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1104 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1105 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1106 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1107 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1108 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1109 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1110 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1111 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1112 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1113 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1114 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1115 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1116 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1117 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1118 };
1119 
1120 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1121 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1122 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1123 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1124 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1125 };
1126 
1127 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1128 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1129 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1130 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1131 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1132 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1133 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1134 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1135 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1136 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1137 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1138 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1139 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1140 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1141 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1142 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1143 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1144 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1145 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1146 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1147 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1148 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1149 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1150 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1151 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1152 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1153 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1154 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1155 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1156 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1157 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1158 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1159 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1160 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1161 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1162 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1163 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1164 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1165 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1166 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1167 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1168 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1169 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1170 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1171 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1172 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1173 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1174 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1175 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1176 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1177 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1178 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1179 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1180 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1181 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1182 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1183 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1184 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1185 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1186 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1187 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1188 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1189 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1190 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1191 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1192 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1193 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1194 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1195 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1196 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1197 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1198 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1199 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1200 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1201 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1202 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1203 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1204 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1205 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1206 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1207 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1208 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1209 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1210 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1211 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1212 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1213 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1214 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1215 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1216 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1217 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1218 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1219 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1220 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1221 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1222 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1223 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1224 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1225 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1226 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1227 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1228 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1229 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1230 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1231 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1232 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1233 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1234 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1235 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1236 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1237 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1238 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1239 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1240 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1241 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1242 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1243 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1244 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1245 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1246 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1247 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1248 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1249 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1250 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1251 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1252 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1253 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1254 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1255 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1256 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1257 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1258 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1259 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1260 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1261 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1262 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1263 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1264 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1265 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1266 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1267 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1268 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1269 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1270 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1271 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1272 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1273 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1274 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1275 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1276 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1277 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1278 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1279 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1280 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1281 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1282 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1283 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1284 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1285 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1286 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1287 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1288 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1289 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1290 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1291 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1292 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1293 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1294 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1295 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1296 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1297 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1298 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1299 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1300 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1301 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1302 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1303 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1304 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1305 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1306 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1307 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1308 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1309 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1310 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1311 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1312 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1313 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1314 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1315 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1316 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1317 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1318 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1319 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1320 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1321 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1322 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1323 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1324 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1325 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1326 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1327 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1328 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1329 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1330 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1331 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1332 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1333 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1334 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1335 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1336 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1337 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1338 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1339 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1340 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1341 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1342 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1343 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1344 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1345 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1346 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1347 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1348 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1349 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1350 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1351 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1352 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1353 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1354 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1355 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1356 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1357 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1358 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1359 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1360 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1361 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1362 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1363 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1364 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1365 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1366 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1367 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1368 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1369 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1370 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1371 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1372 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1373 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1374 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1375 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1376 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1377 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1378 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1379 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1380 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1381 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1382 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1383 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1384 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1385 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1386 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1387 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1388 };
1389 
1390 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1391 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1392 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1393 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1394 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1395 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1396 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1397 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1398 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1399 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1400 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1401 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1402 };
1403 
1404 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1405 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1406 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1407 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1408 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1409 };
1410 
1411 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1412 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1413 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1414 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1415 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1416 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1417 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1418 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1419 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1420 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1421 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1422 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1423 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1424 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1425 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1426 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1427 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1428 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1429 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1430 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1431 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1432 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1433 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1434 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1435 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1436 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1437 };
1438 
1439 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1440 	[ROTATOR_ID_0] = mmROT0_BASE,
1441 	[ROTATOR_ID_1] = mmROT1_BASE
1442 };
1443 
1444 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1445 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1446 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1447 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1448 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1449 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1450 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1451 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1452 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1453 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1454 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1455 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1456 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1457 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1458 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1459 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1460 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1461 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1462 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1463 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1464 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1465 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1466 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1467 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1468 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1469 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1470 };
1471 
1472 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1473 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1474 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1475 };
1476 
1477 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1478 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1479 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1480 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1481 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1482 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1483 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1484 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1485 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1486 };
1487 
1488 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1489 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1490 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1491 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1492 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1493 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1494 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1495 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1496 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1497 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1498 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1499 };
1500 
1501 static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] = {
1502 	RTR_ID_X_Y(2, 4),
1503 	RTR_ID_X_Y(3, 4),
1504 	RTR_ID_X_Y(4, 4),
1505 	RTR_ID_X_Y(5, 4),
1506 	RTR_ID_X_Y(6, 4),
1507 	RTR_ID_X_Y(7, 4),
1508 	RTR_ID_X_Y(8, 4),
1509 	RTR_ID_X_Y(9, 4),
1510 	RTR_ID_X_Y(10, 4),
1511 	RTR_ID_X_Y(11, 4),
1512 	RTR_ID_X_Y(12, 4),
1513 	RTR_ID_X_Y(13, 4),
1514 	RTR_ID_X_Y(14, 4),
1515 	RTR_ID_X_Y(15, 4),
1516 	RTR_ID_X_Y(16, 4),
1517 	RTR_ID_X_Y(17, 4),
1518 	RTR_ID_X_Y(2, 11),
1519 	RTR_ID_X_Y(3, 11),
1520 	RTR_ID_X_Y(4, 11),
1521 	RTR_ID_X_Y(5, 11),
1522 	RTR_ID_X_Y(6, 11),
1523 	RTR_ID_X_Y(7, 11),
1524 	RTR_ID_X_Y(8, 11),
1525 	RTR_ID_X_Y(9, 11),
1526 	RTR_ID_X_Y(0, 0),/* 24 no id */
1527 	RTR_ID_X_Y(0, 0),/* 25 no id */
1528 	RTR_ID_X_Y(0, 0),/* 26 no id */
1529 	RTR_ID_X_Y(0, 0),/* 27 no id */
1530 	RTR_ID_X_Y(14, 11),
1531 	RTR_ID_X_Y(15, 11),
1532 	RTR_ID_X_Y(16, 11),
1533 	RTR_ID_X_Y(17, 11)
1534 };
1535 
1536 enum rtr_id {
1537 	DCORE0_RTR0,
1538 	DCORE0_RTR1,
1539 	DCORE0_RTR2,
1540 	DCORE0_RTR3,
1541 	DCORE0_RTR4,
1542 	DCORE0_RTR5,
1543 	DCORE0_RTR6,
1544 	DCORE0_RTR7,
1545 	DCORE1_RTR0,
1546 	DCORE1_RTR1,
1547 	DCORE1_RTR2,
1548 	DCORE1_RTR3,
1549 	DCORE1_RTR4,
1550 	DCORE1_RTR5,
1551 	DCORE1_RTR6,
1552 	DCORE1_RTR7,
1553 	DCORE2_RTR0,
1554 	DCORE2_RTR1,
1555 	DCORE2_RTR2,
1556 	DCORE2_RTR3,
1557 	DCORE2_RTR4,
1558 	DCORE2_RTR5,
1559 	DCORE2_RTR6,
1560 	DCORE2_RTR7,
1561 	DCORE3_RTR0,
1562 	DCORE3_RTR1,
1563 	DCORE3_RTR2,
1564 	DCORE3_RTR3,
1565 	DCORE3_RTR4,
1566 	DCORE3_RTR5,
1567 	DCORE3_RTR6,
1568 	DCORE3_RTR7,
1569 };
1570 
1571 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1572 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1573 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1574 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1575 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1576 	DCORE0_RTR0
1577 };
1578 
1579 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = {
1580 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1581 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1582 };
1583 
1584 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = {
1585 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1586 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1587 };
1588 
1589 struct sft_info {
1590 	u8 interface_id;
1591 	u8 dcore_id;
1592 };
1593 
1594 static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1595 	{0, 0},	{1, 0}, {0, 1}, {1, 1}, {1, 2}, {1, 3},	{0, 2},	{0, 3},
1596 };
1597 
1598 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = {
1599 	DCORE0_RTR0, DCORE0_RTR0
1600 };
1601 
1602 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = {
1603 	DCORE2_RTR0, DCORE3_RTR7
1604 };
1605 
1606 struct mme_initiators_rtr_id {
1607 	u32 wap0;
1608 	u32 wap1;
1609 	u32 write;
1610 	u32 read;
1611 	u32 sbte0;
1612 	u32 sbte1;
1613 	u32 sbte2;
1614 	u32 sbte3;
1615 	u32 sbte4;
1616 };
1617 
1618 enum mme_initiators {
1619 	MME_WAP0 = 0,
1620 	MME_WAP1,
1621 	MME_WRITE,
1622 	MME_READ,
1623 	MME_SBTE0,
1624 	MME_SBTE1,
1625 	MME_SBTE2,
1626 	MME_SBTE3,
1627 	MME_SBTE4,
1628 	MME_INITIATORS_MAX
1629 };
1630 
1631 static const struct mme_initiators_rtr_id
1632 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1633 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1634 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1635 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1636 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1637 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1638 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1639 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1640 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1641 };
1642 
1643 enum razwi_event_sources {
1644 	RAZWI_TPC,
1645 	RAZWI_MME,
1646 	RAZWI_EDMA,
1647 	RAZWI_PDMA,
1648 	RAZWI_NIC,
1649 	RAZWI_DEC,
1650 	RAZWI_ROT
1651 };
1652 
1653 struct hbm_mc_error_causes {
1654 	u32 mask;
1655 	char cause[50];
1656 };
1657 
1658 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
1659 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
1660 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
1661 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
1662 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
1663 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
1664 };
1665 
1666 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
1667 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
1668 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
1669 	[HBM_SEI_READ_ERR] = "SEI read data error",
1670 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
1671 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
1672 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
1673 	[HBM_SEI_DFI] = "SEI DFI error",
1674 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
1675 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
1676 };
1677 
1678 struct mmu_spi_sei_cause {
1679 	char cause[50];
1680 	int clear_bit;
1681 };
1682 
1683 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
1684 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
1685 	{"page access", 1},		/* INTERRUPT_CLR[1] */
1686 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
1687 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
1688 	{"mmu rei0", -1},		/* no clear register bit */
1689 	{"mmu rei1", -1},		/* no clear register bit */
1690 	{"stlb rei0", -1},		/* no clear register bit */
1691 	{"stlb rei1", -1},		/* no clear register bit */
1692 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
1693 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
1694 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
1695 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
1696 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1697 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1698 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1699 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
1700 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
1701 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
1702 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
1703 };
1704 
1705 struct gaudi2_cache_invld_params {
1706 	u64 start_va;
1707 	u64 end_va;
1708 	u32 inv_start_val;
1709 	u32 flags;
1710 	bool range_invalidation;
1711 };
1712 
1713 struct gaudi2_tpc_idle_data {
1714 	struct engines_data *e;
1715 	unsigned long *mask;
1716 	bool *is_idle;
1717 	const char *tpc_fmt;
1718 };
1719 
1720 struct gaudi2_tpc_mmu_data {
1721 	u32 rw_asid;
1722 };
1723 
1724 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
1725 
1726 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
1727 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
1728 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
1729 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1730 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
1731 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
1732 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
1733 										bool is_memset);
1734 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
1735 
1736 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
1737 {
1738 
1739 }
1740 
1741 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
1742 {
1743 	return sizeof(struct packet_msg_short);
1744 }
1745 
1746 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
1747 {
1748 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
1749 }
1750 
1751 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
1752 {
1753 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1754 	int dcore, inst, tpc_seq;
1755 	u32 offset;
1756 
1757 	/* init the return code */
1758 	ctx->rc = 0;
1759 
1760 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
1761 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
1762 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
1763 
1764 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
1765 				continue;
1766 
1767 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
1768 
1769 			ctx->fn(hdev, dcore, inst, offset, ctx);
1770 			if (ctx->rc) {
1771 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
1772 							dcore, inst);
1773 				return;
1774 			}
1775 		}
1776 	}
1777 
1778 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
1779 		return;
1780 
1781 	/* special check for PCI TPC (DCORE0_TPC6) */
1782 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
1783 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
1784 	if (ctx->rc)
1785 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
1786 }
1787 
1788 static bool gaudi2_host_phys_addr_valid(u64 addr)
1789 {
1790 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
1791 		return true;
1792 
1793 	return false;
1794 }
1795 
1796 static int set_number_of_functional_hbms(struct hl_device *hdev)
1797 {
1798 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1799 	u8 faulty_hbms = hweight64(hdev->dram_binning);
1800 
1801 	/* check if all HBMs should be used */
1802 	if (!faulty_hbms) {
1803 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
1804 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
1805 		return 0;
1806 	}
1807 
1808 	/*
1809 	 * check for error condition in which number of binning
1810 	 * candidates is higher than the maximum supported by the
1811 	 * driver (in which case binning mask shall be ignored and driver will
1812 	 * set the default)
1813 	 */
1814 	if (faulty_hbms > MAX_FAULTY_HBMS) {
1815 		dev_err(hdev->dev,
1816 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
1817 			MAX_FAULTY_HBMS, hdev->dram_binning);
1818 		return -EINVAL;
1819 	}
1820 
1821 	/*
1822 	 * by default, number of functional HBMs in Gaudi2 is always
1823 	 * GAUDI2_HBM_NUM - 1.
1824 	 */
1825 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
1826 	return 0;
1827 }
1828 
1829 static int gaudi2_set_dram_properties(struct hl_device *hdev)
1830 {
1831 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1832 	u32 basic_hbm_page_size;
1833 	int rc;
1834 
1835 	rc = set_number_of_functional_hbms(hdev);
1836 	if (rc)
1837 		return -EINVAL;
1838 
1839 	/*
1840 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
1841 	 * in which we are using x16 bigger page size to be able to populate the entire
1842 	 * HBM mappings in the TLB
1843 	 */
1844 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
1845 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
1846 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
1847 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
1848 	prop->dram_base_address = DRAM_PHYS_BASE;
1849 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
1850 	prop->dram_supports_virtual_memory = true;
1851 
1852 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
1853 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
1854 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
1855 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
1856 
1857 	/* since DRAM page size differs from DMMU page size we need to allocate
1858 	 * DRAM memory in units of dram_page size and mapping this memory in
1859 	 * units of DMMU page size. we overcome this size mismatch using a
1860 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
1861 	 * page.
1862 	 * We therefore:
1863 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
1864 	 *    (suppose we get n such pages)
1865 	 * 2. limit the amount of virtual address space we got from 1 above to
1866 	 *    a multiple of 64M as we don't want the scrambled address to cross
1867 	 *    the DRAM virtual address space.
1868 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
1869 	 * 3. determine the and address accordingly
1870 	 *    end_addr = start_addr + m * 48M
1871 	 *
1872 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
1873 	 */
1874 	prop->dmmu.start_addr = prop->dram_base_address +
1875 			(prop->dram_page_size *
1876 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
1877 
1878 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
1879 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
1880 
1881 	return 0;
1882 }
1883 
1884 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
1885 {
1886 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1887 	struct hw_queue_properties *q_props;
1888 	u32 num_sync_stream_queues = 0;
1889 	int i;
1890 
1891 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
1892 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
1893 					GFP_KERNEL);
1894 
1895 	if (!prop->hw_queues_props)
1896 		return -ENOMEM;
1897 
1898 	q_props = prop->hw_queues_props;
1899 
1900 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
1901 		q_props[i].type = QUEUE_TYPE_HW;
1902 		q_props[i].driver_only = 0;
1903 
1904 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
1905 			q_props[i].supports_sync_stream = 0;
1906 		} else {
1907 			q_props[i].supports_sync_stream = 1;
1908 			num_sync_stream_queues++;
1909 		}
1910 
1911 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
1912 	}
1913 
1914 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
1915 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
1916 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
1917 
1918 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
1919 	prop->cfg_base_address = CFG_BASE;
1920 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
1921 	prop->host_base_address = HOST_PHYS_BASE_0;
1922 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
1923 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
1924 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
1925 	prop->user_dec_intr_count = NUMBER_OF_DEC;
1926 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
1927 	prop->completion_mode = HL_COMPLETION_MODE_CS;
1928 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
1929 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
1930 
1931 	prop->sram_base_address = SRAM_BASE_ADDR;
1932 	prop->sram_size = SRAM_SIZE;
1933 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
1934 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
1935 
1936 	prop->hints_range_reservation = true;
1937 
1938 	if (hdev->pldm)
1939 		prop->mmu_pgt_size = 0x800000; /* 8MB */
1940 	else
1941 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
1942 
1943 	prop->mmu_pte_size = HL_PTE_SIZE;
1944 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
1945 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
1946 
1947 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
1948 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
1949 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
1950 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
1951 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
1952 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
1953 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
1954 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
1955 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
1956 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
1957 	prop->dmmu.page_size = PAGE_SIZE_1GB;
1958 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
1959 	prop->dmmu.last_mask = LAST_MASK;
1960 	prop->dmmu.host_resident = 1;
1961 	/* TODO: will be duplicated until implementing per-MMU props */
1962 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
1963 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1964 
1965 	/*
1966 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
1967 	 * the addresses and allocated space for FW image does not cross memory bounds).
1968 	 * for this reason we set the DRAM size to the minimum possible and later it will
1969 	 * be modified according to what reported in the cpucp info packet
1970 	 */
1971 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
1972 
1973 	hdev->pmmu_huge_range = true;
1974 	prop->pmmu.host_resident = 1;
1975 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
1976 	prop->pmmu.last_mask = LAST_MASK;
1977 	/* TODO: will be duplicated until implementing per-MMU props */
1978 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
1979 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
1980 
1981 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
1982 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
1983 	prop->hints_host_hpage_reserved_va_range.start_addr =
1984 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
1985 	prop->hints_host_hpage_reserved_va_range.end_addr =
1986 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
1987 
1988 	if (PAGE_SIZE == SZ_64K) {
1989 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
1990 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
1991 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
1992 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
1993 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
1994 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
1995 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
1996 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
1997 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
1998 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
1999 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2000 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2001 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2002 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2003 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2004 
2005 		/* shifts and masks are the same in PMMU and HPMMU */
2006 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2007 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2008 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2009 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2010 	} else {
2011 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2012 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2013 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2014 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2015 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2016 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2017 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2018 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2019 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2020 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2021 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2022 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2023 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2024 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2025 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2026 
2027 		/* shifts and masks are the same in PMMU and HPMMU */
2028 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2029 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2030 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2031 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2032 	}
2033 
2034 	prop->num_engine_cores = CPU_ID_MAX;
2035 	prop->cfg_size = CFG_SIZE;
2036 	prop->max_asid = MAX_ASID;
2037 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2038 
2039 	prop->dc_power_default = DC_POWER_DEFAULT;
2040 
2041 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2042 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2043 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2044 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2045 
2046 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2047 
2048 	prop->mme_master_slave_mode = 1;
2049 
2050 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2051 					(num_sync_stream_queues * HL_RSVD_SOBS);
2052 
2053 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2054 					(num_sync_stream_queues * HL_RSVD_MONS);
2055 
2056 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2057 
2058 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2059 
2060 	prop->fw_cpu_boot_dev_sts0_valid = false;
2061 	prop->fw_cpu_boot_dev_sts1_valid = false;
2062 	prop->hard_reset_done_by_fw = false;
2063 	prop->gic_interrupts_enable = true;
2064 
2065 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2066 
2067 	prop->max_dec = NUMBER_OF_DEC;
2068 
2069 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2070 
2071 	prop->dma_mask = 64;
2072 
2073 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2074 
2075 	return 0;
2076 }
2077 
2078 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2079 {
2080 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2081 	bool is_wc[3] = {false, false, true};
2082 	int rc;
2083 
2084 	rc = hl_pci_bars_map(hdev, name, is_wc);
2085 	if (rc)
2086 		return rc;
2087 
2088 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2089 
2090 	return 0;
2091 }
2092 
2093 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2094 {
2095 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2096 	struct hl_inbound_pci_region pci_region;
2097 	u64 old_addr = addr;
2098 	int rc;
2099 
2100 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2101 		return old_addr;
2102 
2103 	if (hdev->asic_prop.iatu_done_by_fw)
2104 		return U64_MAX;
2105 
2106 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2107 	pci_region.mode = PCI_BAR_MATCH_MODE;
2108 	pci_region.bar = DRAM_BAR_ID;
2109 	pci_region.addr = addr;
2110 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2111 	if (rc)
2112 		return U64_MAX;
2113 
2114 	if (gaudi2) {
2115 		old_addr = gaudi2->dram_bar_cur_addr;
2116 		gaudi2->dram_bar_cur_addr = addr;
2117 	}
2118 
2119 	return old_addr;
2120 }
2121 
2122 static int gaudi2_init_iatu(struct hl_device *hdev)
2123 {
2124 	struct hl_inbound_pci_region inbound_region;
2125 	struct hl_outbound_pci_region outbound_region;
2126 	u32 bar_addr_low, bar_addr_high;
2127 	int rc;
2128 
2129 	if (hdev->asic_prop.iatu_done_by_fw)
2130 		return 0;
2131 
2132 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2133 	 * We must map this region in BAR match mode in order to
2134 	 * fetch BAR physical base address
2135 	 */
2136 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2137 	inbound_region.bar = SRAM_CFG_BAR_ID;
2138 	/* Base address must be aligned to Bar size which is 256 MB */
2139 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2140 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2141 	if (rc)
2142 		return rc;
2143 
2144 	/* Fetch physical BAR address */
2145 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2146 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2147 
2148 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2149 
2150 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2151 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2152 	inbound_region.bar = SRAM_CFG_BAR_ID;
2153 	inbound_region.offset_in_bar = 0;
2154 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2155 	inbound_region.size = CFG_REGION_SIZE;
2156 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2157 	if (rc)
2158 		return rc;
2159 
2160 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2161 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2162 	inbound_region.bar = SRAM_CFG_BAR_ID;
2163 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2164 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2165 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2166 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2167 	if (rc)
2168 		return rc;
2169 
2170 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2171 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2172 	inbound_region.bar = DRAM_BAR_ID;
2173 	inbound_region.addr = DRAM_PHYS_BASE;
2174 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2175 	if (rc)
2176 		return rc;
2177 
2178 	/* Outbound Region 0 - Point to Host */
2179 	outbound_region.addr = HOST_PHYS_BASE_0;
2180 	outbound_region.size = HOST_PHYS_SIZE_0;
2181 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2182 
2183 	return rc;
2184 }
2185 
2186 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2187 {
2188 	return RREG32(mmHW_STATE);
2189 }
2190 
2191 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2192 {
2193 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2194 
2195 	/*
2196 	 * check for error condition in which number of binning candidates
2197 	 * is higher than the maximum supported by the driver
2198 	 */
2199 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2200 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2201 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2202 					hdev->tpc_binning);
2203 		return -EINVAL;
2204 	}
2205 
2206 	prop->tpc_binning_mask = hdev->tpc_binning;
2207 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2208 
2209 	return 0;
2210 }
2211 
2212 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2213 {
2214 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2215 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2216 	u64 tpc_binning_mask;
2217 	u8 subst_idx = 0;
2218 	int i, rc;
2219 
2220 	rc = gaudi2_tpc_binning_init_prop(hdev);
2221 	if (rc)
2222 		return rc;
2223 
2224 	tpc_binning_mask = prop->tpc_binning_mask;
2225 
2226 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2227 		u8 subst_seq, binned, qid_base;
2228 
2229 		if (tpc_binning_mask == 0)
2230 			break;
2231 
2232 		if (subst_idx == 0) {
2233 			subst_seq = TPC_ID_DCORE0_TPC6;
2234 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2235 		} else {
2236 			subst_seq = TPC_ID_DCORE3_TPC5;
2237 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2238 		}
2239 
2240 
2241 		/* clear bit from mask */
2242 		binned = __ffs(tpc_binning_mask);
2243 		/*
2244 		 * Coverity complains about possible out-of-bound access in
2245 		 * clear_bit
2246 		 */
2247 		if (binned >= TPC_ID_SIZE) {
2248 			dev_err(hdev->dev,
2249 				"Invalid binned TPC (binning mask: %llx)\n",
2250 				tpc_binning_mask);
2251 			return -EINVAL;
2252 		}
2253 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2254 
2255 		/* also clear replacing TPC bit from enabled mask */
2256 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2257 
2258 		/* bin substite TPC's Qs */
2259 		q_props[qid_base].binned = 1;
2260 		q_props[qid_base + 1].binned = 1;
2261 		q_props[qid_base + 2].binned = 1;
2262 		q_props[qid_base + 3].binned = 1;
2263 
2264 		subst_idx++;
2265 	}
2266 
2267 	return 0;
2268 }
2269 
2270 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2271 {
2272 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2273 	u8 num_faulty;
2274 
2275 	num_faulty = hweight32(hdev->decoder_binning);
2276 
2277 	/*
2278 	 * check for error condition in which number of binning candidates
2279 	 * is higher than the maximum supported by the driver
2280 	 */
2281 	if (num_faulty > MAX_FAULTY_DECODERS) {
2282 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2283 						hdev->decoder_binning);
2284 		return -EINVAL;
2285 	}
2286 
2287 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2288 
2289 	if (prop->decoder_binning_mask)
2290 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2291 	else
2292 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2293 
2294 	return 0;
2295 }
2296 
2297 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2298 {
2299 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2300 
2301 	/* check if we should override default binning */
2302 	if (!hdev->dram_binning) {
2303 		prop->dram_binning_mask = 0;
2304 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2305 		return;
2306 	}
2307 
2308 	/* set DRAM binning constraints */
2309 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2310 	prop->dram_binning_mask = hdev->dram_binning;
2311 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2312 }
2313 
2314 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2315 {
2316 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2317 	struct hw_queue_properties *q_props;
2318 	u8 seq, num_faulty;
2319 
2320 	num_faulty = hweight32(hdev->edma_binning);
2321 
2322 	/*
2323 	 * check for error condition in which number of binning candidates
2324 	 * is higher than the maximum supported by the driver
2325 	 */
2326 	if (num_faulty > MAX_FAULTY_EDMAS) {
2327 		dev_err(hdev->dev,
2328 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2329 			hdev->edma_binning);
2330 		return -EINVAL;
2331 	}
2332 
2333 	if (!hdev->edma_binning) {
2334 		prop->edma_binning_mask = 0;
2335 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2336 		return 0;
2337 	}
2338 
2339 	seq = __ffs((unsigned long)hdev->edma_binning);
2340 
2341 	/* set binning constraints */
2342 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2343 	prop->edma_binning_mask = hdev->edma_binning;
2344 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2345 
2346 	/* bin substitute EDMA's queue */
2347 	q_props = prop->hw_queues_props;
2348 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2349 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2350 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2351 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2352 
2353 	return 0;
2354 }
2355 
2356 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2357 {
2358 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2359 	u8 num_faulty, seq;
2360 
2361 	/* check if we should override default binning */
2362 	if (!xbar_edge_iso_mask) {
2363 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2364 		return 0;
2365 	}
2366 
2367 	/*
2368 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2369 	 * only the FW can set a redundancy value). for user it'll always be 0.
2370 	 */
2371 	num_faulty = hweight32(xbar_edge_iso_mask);
2372 
2373 	/*
2374 	 * check for error condition in which number of binning candidates
2375 	 * is higher than the maximum supported by the driver
2376 	 */
2377 	if (num_faulty > MAX_FAULTY_XBARS) {
2378 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2379 									MAX_FAULTY_XBARS);
2380 		return -EINVAL;
2381 	}
2382 
2383 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2384 
2385 	/* set binning constraints */
2386 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2387 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2388 
2389 	return 0;
2390 }
2391 
2392 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2393 {
2394 	int rc;
2395 
2396 	/*
2397 	 * mark all clusters as good, each component will "fail" cluster
2398 	 * based on eFuse/user values.
2399 	 * If more than single cluster is faulty- the chip is unusable
2400 	 */
2401 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2402 
2403 	gaudi2_set_dram_binning_masks(hdev);
2404 
2405 	rc = gaudi2_set_edma_binning_masks(hdev);
2406 	if (rc)
2407 		return rc;
2408 
2409 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2410 	if (rc)
2411 		return rc;
2412 
2413 
2414 	/* always initially set to full mask */
2415 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2416 
2417 	return 0;
2418 }
2419 
2420 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2421 {
2422 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2423 	int rc;
2424 
2425 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2426 	if (rc)
2427 		return rc;
2428 
2429 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2430 	if (prop->faulty_dram_cluster_map) {
2431 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2432 
2433 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2434 	}
2435 
2436 	return 0;
2437 }
2438 
2439 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2440 {
2441 	int rc;
2442 
2443 	rc = gaudi2_set_cluster_binning_masks(hdev);
2444 	if (rc)
2445 		return rc;
2446 
2447 	rc = gaudi2_set_tpc_binning_masks(hdev);
2448 	if (rc)
2449 		return rc;
2450 
2451 	rc = gaudi2_set_dec_binning_masks(hdev);
2452 	if (rc)
2453 		return rc;
2454 
2455 	return 0;
2456 }
2457 
2458 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2459 {
2460 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2461 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2462 	long max_power;
2463 	u64 dram_size;
2464 	int rc;
2465 
2466 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2467 		return 0;
2468 
2469 	/* No point of asking this information again when not doing hard reset, as the device
2470 	 * CPU hasn't been reset
2471 	 */
2472 	if (hdev->reset_info.in_compute_reset)
2473 		return 0;
2474 
2475 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2476 										mmCPU_BOOT_ERR1);
2477 	if (rc)
2478 		return rc;
2479 
2480 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2481 	if (dram_size) {
2482 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2483 
2484 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2485 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2486 			dev_err(hdev->dev,
2487 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2488 				dram_size, prop->dram_size);
2489 			dram_size = prop->dram_size;
2490 		}
2491 
2492 		prop->dram_size = dram_size;
2493 		prop->dram_end_address = prop->dram_base_address + dram_size;
2494 	}
2495 
2496 	if (!strlen(prop->cpucp_info.card_name))
2497 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2498 
2499 	/* Overwrite binning masks with the actual binning values from F/W */
2500 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2501 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2502 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2503 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2504 
2505 	/*
2506 	 * at this point the DRAM parameters need to be updated according to data obtained
2507 	 * from the FW
2508 	 */
2509 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2510 	if (rc)
2511 		return rc;
2512 
2513 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2514 	if (rc)
2515 		return rc;
2516 
2517 	max_power = hl_fw_get_max_power(hdev);
2518 	if (max_power < 0)
2519 		return max_power;
2520 
2521 	prop->max_power_default = (u64) max_power;
2522 
2523 	return 0;
2524 }
2525 
2526 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2527 {
2528 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2529 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2530 	int rc;
2531 
2532 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2533 		return 0;
2534 
2535 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2536 	if (rc)
2537 		return rc;
2538 
2539 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2540 
2541 	return 0;
2542 }
2543 
2544 static int gaudi2_early_init(struct hl_device *hdev)
2545 {
2546 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2547 	struct pci_dev *pdev = hdev->pdev;
2548 	resource_size_t pci_bar_size;
2549 	int rc;
2550 
2551 	rc = gaudi2_set_fixed_properties(hdev);
2552 	if (rc)
2553 		return rc;
2554 
2555 	/* Check BAR sizes */
2556 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2557 
2558 	if (pci_bar_size != CFG_BAR_SIZE) {
2559 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2560 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2561 		rc = -ENODEV;
2562 		goto free_queue_props;
2563 	}
2564 
2565 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2566 	if (pci_bar_size != MSIX_BAR_SIZE) {
2567 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2568 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2569 		rc = -ENODEV;
2570 		goto free_queue_props;
2571 	}
2572 
2573 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2574 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2575 
2576 	/*
2577 	 * Only in pldm driver config iATU
2578 	 */
2579 	if (hdev->pldm)
2580 		hdev->asic_prop.iatu_done_by_fw = false;
2581 	else
2582 		hdev->asic_prop.iatu_done_by_fw = true;
2583 
2584 	rc = hl_pci_init(hdev);
2585 	if (rc)
2586 		goto free_queue_props;
2587 
2588 	/* Before continuing in the initialization, we need to read the preboot
2589 	 * version to determine whether we run with a security-enabled firmware
2590 	 */
2591 	rc = hl_fw_read_preboot_status(hdev);
2592 	if (rc) {
2593 		if (hdev->reset_on_preboot_fail)
2594 			hdev->asic_funcs->hw_fini(hdev, true, false);
2595 		goto pci_fini;
2596 	}
2597 
2598 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2599 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2600 		hdev->asic_funcs->hw_fini(hdev, true, false);
2601 	}
2602 
2603 	return 0;
2604 
2605 pci_fini:
2606 	hl_pci_fini(hdev);
2607 free_queue_props:
2608 	kfree(hdev->asic_prop.hw_queues_props);
2609 	return rc;
2610 }
2611 
2612 static int gaudi2_early_fini(struct hl_device *hdev)
2613 {
2614 	kfree(hdev->asic_prop.hw_queues_props);
2615 	hl_pci_fini(hdev);
2616 
2617 	return 0;
2618 }
2619 
2620 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
2621 {
2622 	switch (arc_id) {
2623 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
2624 		return true;
2625 	default:
2626 		return false;
2627 	}
2628 }
2629 
2630 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
2631 {
2632 	switch (arc_id) {
2633 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
2634 		return true;
2635 	default:
2636 		return false;
2637 	}
2638 }
2639 
2640 static void gaudi2_init_arcs(struct hl_device *hdev)
2641 {
2642 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2643 	u64 arc_id;
2644 	u32 i;
2645 
2646 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
2647 		if (gaudi2_is_arc_enabled(hdev, i))
2648 			continue;
2649 
2650 		gaudi2_set_arc_id_cap(hdev, i);
2651 	}
2652 
2653 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
2654 		if (!gaudi2_is_queue_enabled(hdev, i))
2655 			continue;
2656 
2657 		arc_id = gaudi2_queue_id_to_arc_id[i];
2658 		if (gaudi2_is_arc_enabled(hdev, arc_id))
2659 			continue;
2660 
2661 		if (gaudi2_is_arc_nic_owned(arc_id) &&
2662 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
2663 			continue;
2664 
2665 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
2666 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
2667 			continue;
2668 
2669 		gaudi2_set_arc_id_cap(hdev, arc_id);
2670 	}
2671 }
2672 
2673 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
2674 {
2675 	u32 reg_base, reg_val;
2676 	int rc;
2677 
2678 	switch (cpu_id) {
2679 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
2680 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
2681 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2682 						ARC_DCCM_BLOCK_SIZE * 2, true);
2683 		if (rc)
2684 			return rc;
2685 		break;
2686 	case CPU_ID_SCHED_ARC4:
2687 	case CPU_ID_SCHED_ARC5:
2688 	case CPU_ID_MME_QMAN_ARC0:
2689 	case CPU_ID_MME_QMAN_ARC1:
2690 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
2691 
2692 		/* Scrub lower DCCM block */
2693 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2694 						ARC_DCCM_BLOCK_SIZE, true);
2695 		if (rc)
2696 			return rc;
2697 
2698 		/* Switch to upper DCCM block */
2699 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
2700 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2701 
2702 		/* Scrub upper DCCM block */
2703 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2704 						ARC_DCCM_BLOCK_SIZE, true);
2705 		if (rc)
2706 			return rc;
2707 
2708 		/* Switch to lower DCCM block */
2709 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
2710 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
2711 		break;
2712 	default:
2713 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
2714 						ARC_DCCM_BLOCK_SIZE, true);
2715 		if (rc)
2716 			return rc;
2717 	}
2718 
2719 	return 0;
2720 }
2721 
2722 static void gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
2723 {
2724 	u16 arc_id;
2725 
2726 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
2727 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
2728 			continue;
2729 
2730 		gaudi2_scrub_arc_dccm(hdev, arc_id);
2731 	}
2732 }
2733 
2734 static int gaudi2_late_init(struct hl_device *hdev)
2735 {
2736 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2737 	int rc;
2738 
2739 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
2740 
2741 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
2742 					gaudi2->virt_msix_db_dma_addr);
2743 	if (rc) {
2744 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2745 		return rc;
2746 	}
2747 
2748 	rc = gaudi2_fetch_psoc_frequency(hdev);
2749 	if (rc) {
2750 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
2751 		goto disable_pci_access;
2752 	}
2753 
2754 	gaudi2_init_arcs(hdev);
2755 	gaudi2_scrub_arcs_dccm(hdev);
2756 	gaudi2_init_security(hdev);
2757 
2758 	return 0;
2759 
2760 disable_pci_access:
2761 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
2762 
2763 	return rc;
2764 }
2765 
2766 static void gaudi2_late_fini(struct hl_device *hdev)
2767 {
2768 	hl_hwmon_release_resources(hdev);
2769 }
2770 
2771 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
2772 {
2773 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2774 
2775 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2776 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2777 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2778 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2779 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2780 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2781 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2782 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2783 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
2784 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
2785 }
2786 
2787 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
2788 {
2789 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2790 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
2791 	u32 block_size, umr_start_idx, num_umr_blocks;
2792 	int i;
2793 
2794 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
2795 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
2796 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
2797 		else
2798 			block_size = ARC_DCCM_BLOCK_SIZE;
2799 
2800 		blocks[i].address = gaudi2_arc_dccm_bases[i];
2801 		blocks[i].size = block_size;
2802 	}
2803 
2804 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
2805 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
2806 
2807 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
2808 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
2809 
2810 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
2811 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
2812 
2813 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
2814 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
2815 
2816 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
2817 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
2818 
2819 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
2820 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
2821 
2822 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
2823 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
2824 
2825 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
2826 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
2827 
2828 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
2829 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
2830 	for (i = 0 ; i < num_umr_blocks ; i++) {
2831 		u8 nic_id, umr_block_id;
2832 
2833 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
2834 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
2835 
2836 		blocks[umr_start_idx + i].address =
2837 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
2838 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
2839 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
2840 			umr_block_id * NIC_UMR_OFFSET;
2841 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
2842 	}
2843 
2844 	/* Expose decoder HW configuration block to user */
2845 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
2846 
2847 	for (i = 1; i < NUM_OF_DCORES; ++i) {
2848 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
2849 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
2850 
2851 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
2852 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
2853 
2854 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
2855 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
2856 	}
2857 }
2858 
2859 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
2860 {
2861 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
2862 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
2863 	int i, j, rc = 0;
2864 
2865 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
2866 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
2867 	 * range.
2868 	 */
2869 
2870 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
2871 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
2872 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
2873 		if (!virt_addr_arr[i]) {
2874 			rc = -ENOMEM;
2875 			goto free_dma_mem_arr;
2876 		}
2877 
2878 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
2879 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
2880 			break;
2881 	}
2882 
2883 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
2884 		dev_err(hdev->dev,
2885 			"MSB of ARC accessible DMA memory are not identical in all range\n");
2886 		rc = -EFAULT;
2887 		goto free_dma_mem_arr;
2888 	}
2889 
2890 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
2891 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
2892 
2893 free_dma_mem_arr:
2894 	for (j = 0 ; j < i ; j++)
2895 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
2896 						dma_addr_arr[j]);
2897 
2898 	return rc;
2899 }
2900 
2901 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
2902 {
2903 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2904 	struct pci_mem_region *region;
2905 
2906 	/* CFG */
2907 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
2908 	region->region_base = CFG_BASE;
2909 	region->region_size = CFG_SIZE;
2910 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
2911 	region->bar_size = CFG_BAR_SIZE;
2912 	region->bar_id = SRAM_CFG_BAR_ID;
2913 	region->used = 1;
2914 
2915 	/* SRAM */
2916 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
2917 	region->region_base = SRAM_BASE_ADDR;
2918 	region->region_size = SRAM_SIZE;
2919 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
2920 	region->bar_size = CFG_BAR_SIZE;
2921 	region->bar_id = SRAM_CFG_BAR_ID;
2922 	region->used = 1;
2923 
2924 	/* DRAM */
2925 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
2926 	region->region_base = DRAM_PHYS_BASE;
2927 	region->region_size = hdev->asic_prop.dram_size;
2928 	region->offset_in_bar = 0;
2929 	region->bar_size = prop->dram_pci_bar_size;
2930 	region->bar_id = DRAM_BAR_ID;
2931 	region->used = 1;
2932 }
2933 
2934 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
2935 {
2936 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2937 	int i, j, k;
2938 
2939 	/* Initialize common user CQ interrupt */
2940 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
2941 				HL_COMMON_USER_CQ_INTERRUPT_ID, false);
2942 
2943 	/* Initialize common decoder interrupt */
2944 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
2945 				HL_COMMON_DEC_INTERRUPT_ID, true);
2946 
2947 	/* User interrupts structure holds both decoder and user interrupts from various engines.
2948 	 * We first initialize the decoder interrupts and then we add the user interrupts.
2949 	 * The only limitation is that the last decoder interrupt id must be smaller
2950 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
2951 	 */
2952 
2953 	/* Initialize decoder interrupts, expose only normal interrupts,
2954 	 * error interrupts to be handled by driver
2955 	 */
2956 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
2957 										i += 2, j++)
2958 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, true);
2959 
2960 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
2961 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, false);
2962 }
2963 
2964 static inline int gaudi2_get_non_zero_random_int(void)
2965 {
2966 	int rand = get_random_u32();
2967 
2968 	return rand ? rand : 1;
2969 }
2970 
2971 static int gaudi2_sw_init(struct hl_device *hdev)
2972 {
2973 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2974 	struct gaudi2_device *gaudi2;
2975 	int i, rc;
2976 
2977 	/* Allocate device structure */
2978 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
2979 	if (!gaudi2)
2980 		return -ENOMEM;
2981 
2982 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
2983 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
2984 			continue;
2985 
2986 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
2987 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
2988 				GAUDI2_EVENT_SIZE);
2989 			rc = -EINVAL;
2990 			goto free_gaudi2_device;
2991 		}
2992 
2993 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
2994 	}
2995 
2996 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
2997 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
2998 
2999 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3000 
3001 	hdev->asic_specific = gaudi2;
3002 
3003 	/* Create DMA pool for small allocations.
3004 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3005 	 * PI/CI registers allocated from this pool have this restriction
3006 	 */
3007 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3008 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3009 	if (!hdev->dma_pool) {
3010 		dev_err(hdev->dev, "failed to create DMA pool\n");
3011 		rc = -ENOMEM;
3012 		goto free_gaudi2_device;
3013 	}
3014 
3015 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3016 	if (rc)
3017 		goto free_dma_pool;
3018 
3019 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3020 	if (!hdev->cpu_accessible_dma_pool) {
3021 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3022 		rc = -ENOMEM;
3023 		goto free_cpu_dma_mem;
3024 	}
3025 
3026 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3027 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3028 	if (rc) {
3029 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3030 		rc = -EFAULT;
3031 		goto free_cpu_accessible_dma_pool;
3032 	}
3033 
3034 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3035 								&gaudi2->virt_msix_db_dma_addr);
3036 	if (!gaudi2->virt_msix_db_cpu_addr) {
3037 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3038 		rc = -ENOMEM;
3039 		goto free_cpu_accessible_dma_pool;
3040 	}
3041 
3042 	spin_lock_init(&gaudi2->hw_queues_lock);
3043 
3044 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3045 							&gaudi2->scratchpad_bus_address,
3046 							GFP_KERNEL | __GFP_ZERO);
3047 	if (!gaudi2->scratchpad_kernel_address) {
3048 		rc = -ENOMEM;
3049 		goto free_virt_msix_db_mem;
3050 	}
3051 
3052 	gaudi2_user_mapped_blocks_init(hdev);
3053 
3054 	/* Initialize user interrupts */
3055 	gaudi2_user_interrupt_setup(hdev);
3056 
3057 	hdev->supports_coresight = true;
3058 	hdev->supports_sync_stream = true;
3059 	hdev->supports_cb_mapping = true;
3060 	hdev->supports_wait_for_multi_cs = false;
3061 
3062 	prop->supports_compute_reset = true;
3063 
3064 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3065 
3066 	return 0;
3067 
3068 free_virt_msix_db_mem:
3069 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3070 free_cpu_accessible_dma_pool:
3071 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3072 free_cpu_dma_mem:
3073 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3074 					hdev->cpu_accessible_dma_address);
3075 free_dma_pool:
3076 	dma_pool_destroy(hdev->dma_pool);
3077 free_gaudi2_device:
3078 	kfree(gaudi2);
3079 	return rc;
3080 }
3081 
3082 static int gaudi2_sw_fini(struct hl_device *hdev)
3083 {
3084 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3085 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3086 
3087 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3088 
3089 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3090 
3091 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3092 						hdev->cpu_accessible_dma_address);
3093 
3094 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3095 					gaudi2->scratchpad_bus_address);
3096 
3097 	dma_pool_destroy(hdev->dma_pool);
3098 
3099 	kfree(gaudi2);
3100 
3101 	return 0;
3102 }
3103 
3104 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3105 {
3106 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3107 						QM_GLBL_CFG1_CQF_STOP |
3108 						QM_GLBL_CFG1_CP_STOP);
3109 
3110 	/* stop also the ARC */
3111 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3112 }
3113 
3114 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3115 {
3116 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3117 						QM_GLBL_CFG1_CQF_FLUSH |
3118 						QM_GLBL_CFG1_CP_FLUSH);
3119 }
3120 
3121 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3122 {
3123 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3124 }
3125 
3126 /**
3127  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3128  *
3129  * @hdev: pointer to the habanalabs device structure
3130  * @queue_id: queue to clear fence counters to
3131  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3132  *              getting stuck on any fence value. otherwise set all fence
3133  *              counters to 0 (standard clear of fence counters)
3134  */
3135 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3136 						bool skip_fence)
3137 {
3138 	u32 size, reg_base;
3139 	u32 addr, val;
3140 
3141 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3142 
3143 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3144 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3145 
3146 	/*
3147 	 * in case we want to make sure that QM that is stuck on a fence will
3148 	 * be released we should set the fence counter to a higher value that
3149 	 * the value the QM waiting for. to comply with any fence counter of
3150 	 * any value we set maximum fence value to all counters
3151 	 */
3152 	val = skip_fence ? U32_MAX : 0;
3153 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3154 }
3155 
3156 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3157 {
3158 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3159 
3160 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3161 	gaudi2_flush_qman_common(hdev, reg_base);
3162 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3163 }
3164 
3165 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3166 {
3167 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3168 	int dcore, inst;
3169 
3170 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3171 		goto stop_edma_qmans;
3172 
3173 	/* Stop CPs of PDMA QMANs */
3174 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3175 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3176 
3177 stop_edma_qmans:
3178 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3179 		return;
3180 
3181 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3182 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3183 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3184 			u32 qm_base;
3185 
3186 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3187 				continue;
3188 
3189 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3190 					inst * DCORE_EDMA_OFFSET;
3191 
3192 			/* Stop CPs of EDMA QMANs */
3193 			gaudi2_stop_qman_common(hdev, qm_base);
3194 		}
3195 	}
3196 }
3197 
3198 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3199 {
3200 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201 	u32 offset, i;
3202 
3203 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3204 
3205 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3206 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3207 			continue;
3208 
3209 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3210 	}
3211 }
3212 
3213 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3214 {
3215 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3216 	u32 reg_base;
3217 	int i;
3218 
3219 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3220 		return;
3221 
3222 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3223 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3224 			continue;
3225 
3226 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3227 		gaudi2_stop_qman_common(hdev, reg_base);
3228 	}
3229 }
3230 
3231 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3232 {
3233 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3234 	u32 reg_base;
3235 	int i;
3236 
3237 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3238 		return;
3239 
3240 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3241 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3242 			continue;
3243 
3244 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3245 		gaudi2_stop_qman_common(hdev, reg_base);
3246 	}
3247 }
3248 
3249 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3250 {
3251 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3252 	u32 reg_base, queue_id;
3253 	int i;
3254 
3255 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3256 		return;
3257 
3258 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3259 
3260 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3261 		if (!(hdev->nic_ports_mask & BIT(i)))
3262 			continue;
3263 
3264 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3265 		gaudi2_stop_qman_common(hdev, reg_base);
3266 	}
3267 }
3268 
3269 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3270 {
3271 	u32 reg_val;
3272 
3273 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3274 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3275 }
3276 
3277 static void gaudi2_dma_stall(struct hl_device *hdev)
3278 {
3279 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3280 	int dcore, inst;
3281 
3282 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3283 		goto stall_edma;
3284 
3285 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3286 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3287 
3288 stall_edma:
3289 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3290 		return;
3291 
3292 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3293 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3294 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3295 			u32 core_base;
3296 
3297 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3298 				continue;
3299 
3300 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3301 					inst * DCORE_EDMA_OFFSET;
3302 
3303 			/* Stall CPs of EDMA QMANs */
3304 			gaudi2_stall_dma_common(hdev, core_base);
3305 		}
3306 	}
3307 }
3308 
3309 static void gaudi2_mme_stall(struct hl_device *hdev)
3310 {
3311 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3312 	u32 offset, i;
3313 
3314 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3315 
3316 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3317 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3318 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3319 }
3320 
3321 static void gaudi2_tpc_stall(struct hl_device *hdev)
3322 {
3323 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3324 	u32 reg_base;
3325 	int i;
3326 
3327 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3328 		return;
3329 
3330 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3331 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3332 			continue;
3333 
3334 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3335 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3336 	}
3337 }
3338 
3339 static void gaudi2_rotator_stall(struct hl_device *hdev)
3340 {
3341 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3342 	u32 reg_val;
3343 	int i;
3344 
3345 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3346 		return;
3347 
3348 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3349 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3350 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3351 
3352 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3353 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3354 			continue;
3355 
3356 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3357 	}
3358 }
3359 
3360 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3361 {
3362 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3363 }
3364 
3365 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3366 {
3367 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3368 	int dcore, inst;
3369 
3370 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3371 		goto stop_edma_qmans;
3372 
3373 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3374 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3375 
3376 stop_edma_qmans:
3377 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3378 		return;
3379 
3380 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3381 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3382 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3383 			u32 qm_base;
3384 
3385 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3386 				continue;
3387 
3388 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3389 					inst * DCORE_EDMA_OFFSET;
3390 
3391 			/* Disable CPs of EDMA QMANs */
3392 			gaudi2_disable_qman_common(hdev, qm_base);
3393 		}
3394 	}
3395 }
3396 
3397 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3398 {
3399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3400 	u32 offset, i;
3401 
3402 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3403 
3404 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3405 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3406 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3407 }
3408 
3409 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3410 {
3411 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3412 	u32 reg_base;
3413 	int i;
3414 
3415 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3416 		return;
3417 
3418 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3419 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3420 			continue;
3421 
3422 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3423 		gaudi2_disable_qman_common(hdev, reg_base);
3424 	}
3425 }
3426 
3427 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3428 {
3429 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3430 	u32 reg_base;
3431 	int i;
3432 
3433 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3434 		return;
3435 
3436 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3437 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3438 			continue;
3439 
3440 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3441 		gaudi2_disable_qman_common(hdev, reg_base);
3442 	}
3443 }
3444 
3445 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3446 {
3447 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3448 	u32 reg_base, queue_id;
3449 	int i;
3450 
3451 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3452 		return;
3453 
3454 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3455 
3456 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3457 		if (!(hdev->nic_ports_mask & BIT(i)))
3458 			continue;
3459 
3460 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3461 		gaudi2_disable_qman_common(hdev, reg_base);
3462 	}
3463 }
3464 
3465 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3466 {
3467 	/* Disable the timestamp counter */
3468 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3469 
3470 	/* Zero the lower/upper parts of the 64-bit counter */
3471 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3472 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3473 
3474 	/* Enable the counter */
3475 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3476 }
3477 
3478 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3479 {
3480 	/* Disable the timestamp counter */
3481 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3482 }
3483 
3484 static const char *gaudi2_irq_name(u16 irq_number)
3485 {
3486 	switch (irq_number) {
3487 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
3488 		return "gaudi2 cpu eq";
3489 	case GAUDI2_IRQ_NUM_COMPLETION:
3490 		return "gaudi2 completion";
3491 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
3492 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
3493 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
3494 		return "gaudi2 user completion";
3495 	default:
3496 		return "invalid";
3497 	}
3498 }
3499 
3500 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
3501 {
3502 	int i, irq, relative_idx;
3503 	struct hl_dec *dec;
3504 
3505 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
3506 		irq = pci_irq_vector(hdev->pdev, i);
3507 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3508 
3509 		dec = hdev->dec + relative_idx / 2;
3510 
3511 		/* We pass different structures depending on the irq handler. For the abnormal
3512 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3513 		 * user_interrupt entry
3514 		 */
3515 		free_irq(irq, ((relative_idx % 2) ?
3516 				(void *) dec :
3517 				(void *) &hdev->user_interrupt[dec->core_id]));
3518 	}
3519 }
3520 
3521 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
3522 {
3523 	int rc, i, irq_init_cnt, irq, relative_idx;
3524 	irq_handler_t irq_handler;
3525 	struct hl_dec *dec;
3526 
3527 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
3528 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
3529 			i++, irq_init_cnt++) {
3530 
3531 		irq = pci_irq_vector(hdev->pdev, i);
3532 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
3533 
3534 		irq_handler = (relative_idx % 2) ?
3535 				hl_irq_handler_dec_abnrm :
3536 				hl_irq_handler_user_interrupt;
3537 
3538 		dec = hdev->dec + relative_idx / 2;
3539 
3540 		/* We pass different structures depending on the irq handler. For the abnormal
3541 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
3542 		 * user_interrupt entry
3543 		 */
3544 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i),
3545 				((relative_idx % 2) ?
3546 				(void *) dec :
3547 				(void *) &hdev->user_interrupt[dec->core_id]));
3548 		if (rc) {
3549 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3550 			goto free_dec_irqs;
3551 		}
3552 	}
3553 
3554 	return 0;
3555 
3556 free_dec_irqs:
3557 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
3558 	return rc;
3559 }
3560 
3561 static int gaudi2_enable_msix(struct hl_device *hdev)
3562 {
3563 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3564 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3565 	int rc, irq, i, j, user_irq_init_cnt;
3566 	irq_handler_t irq_handler;
3567 	struct hl_cq *cq;
3568 
3569 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
3570 		return 0;
3571 
3572 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
3573 					PCI_IRQ_MSIX);
3574 	if (rc < 0) {
3575 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
3576 			GAUDI2_MSIX_ENTRIES, rc);
3577 		return rc;
3578 	}
3579 
3580 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3581 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3582 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
3583 	if (rc) {
3584 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3585 		goto free_irq_vectors;
3586 	}
3587 
3588 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3589 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
3590 			&hdev->event_queue);
3591 	if (rc) {
3592 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3593 		goto free_completion_irq;
3594 	}
3595 
3596 	rc = gaudi2_dec_enable_msix(hdev);
3597 	if (rc) {
3598 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
3599 		goto free_event_irq;
3600 	}
3601 
3602 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
3603 			user_irq_init_cnt < prop->user_interrupt_count;
3604 			i++, j++, user_irq_init_cnt++) {
3605 
3606 		irq = pci_irq_vector(hdev->pdev, i);
3607 		irq_handler = hl_irq_handler_user_interrupt;
3608 
3609 		rc = request_irq(irq, irq_handler, 0, gaudi2_irq_name(i), &hdev->user_interrupt[j]);
3610 		if (rc) {
3611 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
3612 			goto free_user_irq;
3613 		}
3614 	}
3615 
3616 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
3617 
3618 	return 0;
3619 
3620 free_user_irq:
3621 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
3622 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
3623 
3624 		irq = pci_irq_vector(hdev->pdev, i);
3625 		free_irq(irq, &hdev->user_interrupt[j]);
3626 	}
3627 
3628 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3629 
3630 free_event_irq:
3631 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3632 	free_irq(irq, cq);
3633 
3634 free_completion_irq:
3635 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3636 	free_irq(irq, cq);
3637 
3638 free_irq_vectors:
3639 	pci_free_irq_vectors(hdev->pdev);
3640 
3641 	return rc;
3642 }
3643 
3644 static void gaudi2_sync_irqs(struct hl_device *hdev)
3645 {
3646 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3647 	int i, j;
3648 	int irq;
3649 
3650 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3651 		return;
3652 
3653 	/* Wait for all pending IRQs to be finished */
3654 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
3655 
3656 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
3657 		irq = pci_irq_vector(hdev->pdev, i);
3658 		synchronize_irq(irq);
3659 	}
3660 
3661 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
3662 										i++, j++) {
3663 		irq = pci_irq_vector(hdev->pdev, i);
3664 		synchronize_irq(irq);
3665 	}
3666 
3667 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
3668 }
3669 
3670 static void gaudi2_disable_msix(struct hl_device *hdev)
3671 {
3672 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3673 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3674 	struct hl_cq *cq;
3675 	int irq, i, j, k;
3676 
3677 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
3678 		return;
3679 
3680 	gaudi2_sync_irqs(hdev);
3681 
3682 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
3683 	free_irq(irq, &hdev->event_queue);
3684 
3685 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
3686 
3687 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
3688 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
3689 
3690 		irq = pci_irq_vector(hdev->pdev, i);
3691 		free_irq(irq, &hdev->user_interrupt[j]);
3692 	}
3693 
3694 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
3695 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
3696 	free_irq(irq, cq);
3697 
3698 	pci_free_irq_vectors(hdev->pdev);
3699 
3700 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
3701 }
3702 
3703 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
3704 {
3705 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3706 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3707 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3708 	int rc;
3709 
3710 	if (hdev->pldm)
3711 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3712 	else
3713 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3714 
3715 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3716 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
3717 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3718 			continue;
3719 
3720 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
3721 
3722 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
3723 
3724 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3725 
3726 		/* Wait till all traffic from decoder stops
3727 		 * before apply core reset.
3728 		 */
3729 		rc = hl_poll_timeout(
3730 				hdev,
3731 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3732 				graceful,
3733 				(graceful & graceful_pend_mask),
3734 				100,
3735 				timeout_usec);
3736 		if (rc)
3737 			dev_err(hdev->dev,
3738 				"Failed to stop traffic from DCORE%d Decoder %d\n",
3739 				dcore_id, dec_id);
3740 	}
3741 }
3742 
3743 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
3744 {
3745 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
3746 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
3747 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
3748 	int rc;
3749 
3750 	if (hdev->pldm)
3751 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
3752 	else
3753 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
3754 
3755 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
3756 		dec_bit = PCIE_DEC_SHIFT + dec_id;
3757 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
3758 			continue;
3759 
3760 		offset = dec_id * PCIE_VDEC_OFFSET;
3761 
3762 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
3763 
3764 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
3765 
3766 		/* Wait till all traffic from decoder stops
3767 		 * before apply core reset.
3768 		 */
3769 		rc = hl_poll_timeout(
3770 				hdev,
3771 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
3772 				graceful,
3773 				(graceful & graceful_pend_mask),
3774 				100,
3775 				timeout_usec);
3776 		if (rc)
3777 			dev_err(hdev->dev,
3778 				"Failed to stop traffic from PCIe Decoder %d\n",
3779 				dec_id);
3780 	}
3781 }
3782 
3783 static void gaudi2_stop_dec(struct hl_device *hdev)
3784 {
3785 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3786 	int dcore_id;
3787 
3788 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
3789 		return;
3790 
3791 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
3792 		gaudi2_stop_dcore_dec(hdev, dcore_id);
3793 
3794 	gaudi2_stop_pcie_dec(hdev);
3795 }
3796 
3797 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3798 {
3799 	u32 reg_base, reg_val;
3800 
3801 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3802 	if (run_mode == HL_ENGINE_CORE_RUN)
3803 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
3804 	else
3805 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
3806 
3807 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
3808 }
3809 
3810 static void gaudi2_halt_arcs(struct hl_device *hdev)
3811 {
3812 	u16 arc_id;
3813 
3814 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
3815 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3816 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
3817 	}
3818 }
3819 
3820 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
3821 {
3822 	int rc;
3823 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
3824 
3825 	if (hdev->pldm)
3826 		timeout_usec *= 100;
3827 
3828 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
3829 	if (run_mode == HL_ENGINE_CORE_RUN)
3830 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
3831 	else
3832 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
3833 
3834 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
3835 				val, ((val & ack_mask) == ack_mask),
3836 				1000, timeout_usec);
3837 
3838 	if (!rc) {
3839 		/* Clear */
3840 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
3841 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
3842 	}
3843 
3844 	return rc;
3845 }
3846 
3847 static void gaudi2_reset_arcs(struct hl_device *hdev)
3848 {
3849 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3850 	u16 arc_id;
3851 
3852 	if (!gaudi2)
3853 		return;
3854 
3855 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
3856 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3857 			gaudi2_clr_arc_id_cap(hdev, arc_id);
3858 }
3859 
3860 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
3861 {
3862 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3863 	u32 queue_id;
3864 	int i;
3865 
3866 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3867 		return;
3868 
3869 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3870 
3871 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3872 		if (!(hdev->nic_ports_mask & BIT(i)))
3873 			continue;
3874 
3875 		gaudi2_qman_manual_flush_common(hdev, queue_id);
3876 	}
3877 }
3878 
3879 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
3880 					u32 num_cores, u32 core_command)
3881 {
3882 	int i, rc;
3883 
3884 
3885 	for (i = 0 ; i < num_cores ; i++) {
3886 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
3887 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
3888 	}
3889 
3890 	for (i = 0 ; i < num_cores ; i++) {
3891 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
3892 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
3893 
3894 			if (rc) {
3895 				dev_err(hdev->dev, "failed to %s arc: %d\n",
3896 					(core_command == HL_ENGINE_CORE_HALT) ?
3897 					"HALT" : "RUN", core_ids[i]);
3898 				return -1;
3899 			}
3900 		}
3901 	}
3902 
3903 	return 0;
3904 }
3905 
3906 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3907 {
3908 	u32 wait_timeout_ms;
3909 
3910 	if (hdev->pldm)
3911 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
3912 	else
3913 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
3914 
3915 	if (fw_reset)
3916 		goto skip_engines;
3917 
3918 	gaudi2_stop_dma_qmans(hdev);
3919 	gaudi2_stop_mme_qmans(hdev);
3920 	gaudi2_stop_tpc_qmans(hdev);
3921 	gaudi2_stop_rot_qmans(hdev);
3922 	gaudi2_stop_nic_qmans(hdev);
3923 	msleep(wait_timeout_ms);
3924 
3925 	gaudi2_halt_arcs(hdev);
3926 	gaudi2_dma_stall(hdev);
3927 	gaudi2_mme_stall(hdev);
3928 	gaudi2_tpc_stall(hdev);
3929 	gaudi2_rotator_stall(hdev);
3930 
3931 	msleep(wait_timeout_ms);
3932 
3933 	gaudi2_stop_dec(hdev);
3934 
3935 	/*
3936 	 * in case of soft reset do a manual flush for QMANs (currently called
3937 	 * only for NIC QMANs
3938 	 */
3939 	if (!hard_reset)
3940 		gaudi2_nic_qmans_manual_flush(hdev);
3941 
3942 	gaudi2_disable_dma_qmans(hdev);
3943 	gaudi2_disable_mme_qmans(hdev);
3944 	gaudi2_disable_tpc_qmans(hdev);
3945 	gaudi2_disable_rot_qmans(hdev);
3946 	gaudi2_disable_nic_qmans(hdev);
3947 	gaudi2_disable_timestamp(hdev);
3948 
3949 skip_engines:
3950 	if (hard_reset) {
3951 		gaudi2_disable_msix(hdev);
3952 		return;
3953 	}
3954 
3955 	gaudi2_sync_irqs(hdev);
3956 }
3957 
3958 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
3959 {
3960 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3961 
3962 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3963 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3964 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3965 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3966 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3967 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
3968 }
3969 
3970 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
3971 {
3972 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3973 	struct dynamic_fw_load_mgr *dynamic_loader;
3974 	struct cpu_dyn_regs *dyn_regs;
3975 
3976 	/* fill common fields */
3977 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3978 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
3979 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
3980 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
3981 	fw_loader->skip_bmc = false;
3982 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
3983 	fw_loader->dram_bar_id = DRAM_BAR_ID;
3984 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
3985 
3986 	/* here we update initial values for few specific dynamic regs (as
3987 	 * before reading the first descriptor from FW those value has to be
3988 	 * hard-coded). in later stages of the protocol those values will be
3989 	 * updated automatically by reading the FW descriptor so data there
3990 	 * will always be up-to-date
3991 	 */
3992 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3993 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3994 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3995 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3996 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
3997 }
3998 
3999 static int gaudi2_init_cpu(struct hl_device *hdev)
4000 {
4001 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4002 	int rc;
4003 
4004 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4005 		return 0;
4006 
4007 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4008 		return 0;
4009 
4010 	rc = hl_fw_init_cpu(hdev);
4011 	if (rc)
4012 		return rc;
4013 
4014 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4015 
4016 	return 0;
4017 }
4018 
4019 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4020 {
4021 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4022 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4023 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4024 	struct cpu_dyn_regs *dyn_regs;
4025 	struct hl_eq *eq;
4026 	u32 status;
4027 	int err;
4028 
4029 	if (!hdev->cpu_queues_enable)
4030 		return 0;
4031 
4032 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4033 		return 0;
4034 
4035 	eq = &hdev->event_queue;
4036 
4037 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4038 
4039 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4040 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4041 
4042 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4043 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4044 
4045 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4046 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4047 
4048 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4049 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4050 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4051 
4052 	/* Used for EQ CI */
4053 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4054 
4055 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4056 
4057 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4058 
4059 	/* Let the ARC know we are ready as it is now handling those queues  */
4060 
4061 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4062 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4063 
4064 	err = hl_poll_timeout(
4065 		hdev,
4066 		mmCPU_IF_QUEUE_INIT,
4067 		status,
4068 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4069 		1000,
4070 		cpu_timeout);
4071 
4072 	if (err) {
4073 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4074 		return -EIO;
4075 	}
4076 
4077 	/* update FW application security bits */
4078 	if (prop->fw_cpu_boot_dev_sts0_valid)
4079 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4080 
4081 	if (prop->fw_cpu_boot_dev_sts1_valid)
4082 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4083 
4084 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4085 	return 0;
4086 }
4087 
4088 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4089 				u32 queue_id_base)
4090 {
4091 	struct hl_hw_queue *q;
4092 	u32 pq_id, pq_offset;
4093 
4094 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4095 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4096 		pq_offset = pq_id * 4;
4097 
4098 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4099 				lower_32_bits(q->bus_address));
4100 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4101 				upper_32_bits(q->bus_address));
4102 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4103 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4104 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4105 	}
4106 }
4107 
4108 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4109 {
4110 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4111 
4112 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4113 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4114 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4115 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4116 
4117 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4118 		cp_offset = cp_id * 4;
4119 
4120 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4121 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4122 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4123 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4124 	}
4125 
4126 	/* allow QMANs to accept work from ARC CQF */
4127 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4128 }
4129 
4130 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4131 				u32 queue_id_base)
4132 {
4133 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4134 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4135 
4136 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4137 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4138 
4139 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4140 		pq_offset = pq_id * 4;
4141 
4142 		/* Configure QMAN HBW to scratchpad as it is not needed */
4143 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4144 				lower_32_bits(gaudi2->scratchpad_bus_address));
4145 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4146 				upper_32_bits(gaudi2->scratchpad_bus_address));
4147 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4148 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4149 
4150 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4151 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4152 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4153 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4154 	}
4155 
4156 	/* Enable QMAN H/W completion */
4157 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4158 }
4159 
4160 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4161 {
4162 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4163 	u32 sp_reg_addr;
4164 
4165 	switch (queue_id_base) {
4166 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4167 		fallthrough;
4168 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4169 		fallthrough;
4170 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4171 		fallthrough;
4172 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4173 		fallthrough;
4174 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4175 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4176 		break;
4177 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4178 		fallthrough;
4179 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4180 		fallthrough;
4181 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4182 		fallthrough;
4183 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4184 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4185 		break;
4186 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4187 		fallthrough;
4188 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4189 		fallthrough;
4190 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4191 		fallthrough;
4192 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4193 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4194 		break;
4195 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4196 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4197 		break;
4198 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4199 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4200 		break;
4201 	default:
4202 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4203 		return 0;
4204 	}
4205 
4206 	return sp_reg_addr;
4207 }
4208 
4209 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4210 					u32 queue_id_base)
4211 {
4212 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4213 	int map_table_entry;
4214 
4215 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4216 
4217 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4218 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4219 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4220 
4221 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4222 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4223 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4224 
4225 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4226 
4227 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4228 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4229 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4230 
4231 	/* Enable the QMAN channel.
4232 	 * PDMA QMAN configuration is different, as we do not allow user to
4233 	 * access some of the CPs.
4234 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4235 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4236 	 */
4237 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4238 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4239 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4240 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4241 	else
4242 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4243 }
4244 
4245 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4246 		u32 queue_id_base)
4247 {
4248 	u32 pq_id;
4249 
4250 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4251 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4252 
4253 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4254 	gaudi2_init_qman_cp(hdev, reg_base);
4255 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4256 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4257 }
4258 
4259 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4260 				u32 dma_core_id, bool is_secure)
4261 {
4262 	u32 prot, irq_handler_offset;
4263 	struct cpu_dyn_regs *dyn_regs;
4264 	int map_table_entry;
4265 
4266 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4267 	if (is_secure)
4268 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4269 
4270 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4271 
4272 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4273 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4274 
4275 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4276 			lower_32_bits(CFG_BASE + irq_handler_offset));
4277 
4278 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4279 			upper_32_bits(CFG_BASE + irq_handler_offset));
4280 
4281 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4282 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4283 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4284 
4285 	/* Enable the DMA channel */
4286 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4287 }
4288 
4289 static void gaudi2_init_kdma(struct hl_device *hdev)
4290 {
4291 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4292 	u32 reg_base;
4293 
4294 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4295 		return;
4296 
4297 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4298 
4299 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4300 
4301 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4302 }
4303 
4304 static void gaudi2_init_pdma(struct hl_device *hdev)
4305 {
4306 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4307 	u32 reg_base;
4308 
4309 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4310 		return;
4311 
4312 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4313 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
4314 
4315 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
4316 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
4317 
4318 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
4319 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
4320 
4321 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
4322 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
4323 
4324 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
4325 }
4326 
4327 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
4328 {
4329 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
4330 
4331 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
4332 	base_edma_qman_id = edma_stream_base[seq];
4333 
4334 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
4335 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
4336 
4337 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
4338 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
4339 }
4340 
4341 static void gaudi2_init_edma(struct hl_device *hdev)
4342 {
4343 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4344 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4345 	int dcore, inst;
4346 
4347 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
4348 		return;
4349 
4350 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4351 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4352 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4353 
4354 			if (!(prop->edma_enabled_mask & BIT(seq)))
4355 				continue;
4356 
4357 			gaudi2_init_edma_instance(hdev, seq);
4358 
4359 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
4360 		}
4361 	}
4362 }
4363 
4364 /*
4365  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
4366  * @hdev: pointer to habanalabs device structure.
4367  * @sob_id: sync object ID.
4368  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
4369  * @interrupt_id: interrupt ID.
4370  *
4371  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
4372  * write directly to the HBW host memory of the virtual MSI-X doorbell.
4373  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
4374  *
4375  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
4376  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
4377  * completion, by decrementing the sync object value and re-arming the monitor.
4378  */
4379 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
4380 							u32 first_mon_id, u32 interrupt_id)
4381 {
4382 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
4383 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4384 	u64 addr;
4385 	u8 mask;
4386 
4387 	/* Reset the SOB value */
4388 	sob_offset = sob_id * sizeof(u32);
4389 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
4390 
4391 	/* Configure 3 monitors:
4392 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
4393 	 * 2. Decrement SOB value by 1.
4394 	 * 3. Re-arm the master monitor.
4395 	 */
4396 
4397 	first_mon_offset = first_mon_id * sizeof(u32);
4398 
4399 	/* 2nd monitor: Decrement SOB value by 1 */
4400 	mon_offset = first_mon_offset + sizeof(u32);
4401 
4402 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
4403 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4404 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4405 
4406 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
4407 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
4408 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
4409 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4410 
4411 	/* 3rd monitor: Re-arm the master monitor */
4412 	mon_offset = first_mon_offset + 2 * sizeof(u32);
4413 
4414 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
4415 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4416 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4417 
4418 	sob_group = sob_id / 8;
4419 	mask = ~BIT(sob_id & 0x7);
4420 	mode = 0; /* comparison mode is "greater than or equal to" */
4421 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
4422 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
4423 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
4424 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
4425 
4426 	payload = arm;
4427 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4428 
4429 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
4430 	mon_offset = first_mon_offset;
4431 
4432 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
4433 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
4434 
4435 	addr = gaudi2->virt_msix_db_dma_addr;
4436 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
4437 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
4438 
4439 	payload = interrupt_id;
4440 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
4441 
4442 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
4443 }
4444 
4445 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
4446 {
4447 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
4448 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4449 
4450 	/* Decoder normal/abnormal interrupts */
4451 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
4452 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
4453 			continue;
4454 
4455 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4456 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
4457 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
4458 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4459 
4460 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4461 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
4462 		interrupt_id += 1;
4463 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
4464 	}
4465 }
4466 
4467 static void gaudi2_init_sm(struct hl_device *hdev)
4468 {
4469 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4470 	u64 cq_address;
4471 	u32 reg_val;
4472 	int i;
4473 
4474 	/* Enable HBW/LBW CQ for completion monitors */
4475 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4476 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
4477 
4478 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
4479 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4480 
4481 	/* Enable only HBW CQ for KDMA completion monitor */
4482 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
4483 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
4484 
4485 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
4486 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
4487 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
4488 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
4489 
4490 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
4491 		cq_address =
4492 			hdev->completion_queue[i].bus_address;
4493 
4494 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
4495 							lower_32_bits(cq_address));
4496 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
4497 							upper_32_bits(cq_address));
4498 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
4499 							ilog2(HL_CQ_SIZE_IN_BYTES));
4500 	}
4501 
4502 	/* Configure kernel ASID and MMU BP*/
4503 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
4504 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
4505 
4506 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
4507 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
4508 }
4509 
4510 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
4511 {
4512 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4513 	u32 reg_val;
4514 	int i;
4515 
4516 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
4517 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
4518 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
4519 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
4520 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
4521 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
4522 
4523 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
4524 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
4525 
4526 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
4527 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
4528 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
4529 	}
4530 }
4531 
4532 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
4533 							bool config_qman_only)
4534 {
4535 	u32 queue_id_base, reg_base;
4536 
4537 	switch (dcore_id) {
4538 	case 0:
4539 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
4540 		break;
4541 	case 1:
4542 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
4543 		break;
4544 	case 2:
4545 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
4546 		break;
4547 	case 3:
4548 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
4549 		break;
4550 	default:
4551 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
4552 		return;
4553 	}
4554 
4555 	if (!config_qman_only) {
4556 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
4557 		gaudi2_init_mme_acc(hdev, reg_base);
4558 	}
4559 
4560 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
4561 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
4562 }
4563 
4564 static void gaudi2_init_mme(struct hl_device *hdev)
4565 {
4566 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4567 	int i;
4568 
4569 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
4570 		return;
4571 
4572 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
4573 		gaudi2_init_dcore_mme(hdev, i, false);
4574 
4575 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
4576 	}
4577 }
4578 
4579 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
4580 {
4581 	/* Mask arithmetic and QM interrupts in TPC */
4582 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
4583 
4584 	/* Set 16 cache lines */
4585 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
4586 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
4587 }
4588 
4589 struct gaudi2_tpc_init_cfg_data {
4590 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
4591 };
4592 
4593 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
4594 					u32 offset, struct iterate_module_ctx *ctx)
4595 {
4596 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4597 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
4598 	u32 queue_id_base;
4599 	u8 seq;
4600 
4601 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
4602 
4603 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
4604 		/* gets last sequence number */
4605 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
4606 	else
4607 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
4608 
4609 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
4610 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
4611 
4612 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
4613 }
4614 
4615 static void gaudi2_init_tpc(struct hl_device *hdev)
4616 {
4617 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4618 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
4619 	struct iterate_module_ctx tpc_iter;
4620 
4621 	if (!hdev->asic_prop.tpc_enabled_mask)
4622 		return;
4623 
4624 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
4625 		return;
4626 
4627 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
4628 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
4629 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
4630 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
4631 	tpc_iter.fn = &gaudi2_init_tpc_config;
4632 	tpc_iter.data = &init_cfg_data;
4633 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
4634 }
4635 
4636 static void gaudi2_init_rotator(struct hl_device *hdev)
4637 {
4638 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4639 	u32 i, reg_base, queue_id;
4640 
4641 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
4642 
4643 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4644 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4645 		gaudi2_init_qman(hdev, reg_base, queue_id);
4646 
4647 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
4648 	}
4649 }
4650 
4651 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
4652 {
4653 	u32 sob_id;
4654 
4655 	/* VCMD normal interrupt */
4656 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
4657 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
4658 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4659 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4660 
4661 	/* VCMD abnormal interrupt */
4662 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
4663 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
4664 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
4665 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
4666 }
4667 
4668 static void gaudi2_init_dec(struct hl_device *hdev)
4669 {
4670 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4671 	u32 dcore_id, dec_id, dec_bit;
4672 	u64 base_addr;
4673 
4674 	if (!hdev->asic_prop.decoder_enabled_mask)
4675 		return;
4676 
4677 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
4678 		return;
4679 
4680 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4681 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4682 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4683 
4684 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4685 				continue;
4686 
4687 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
4688 					BRDG_CTRL_BLOCK_OFFSET +
4689 					dcore_id * DCORE_OFFSET +
4690 					dec_id * DCORE_VDEC_OFFSET;
4691 
4692 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4693 
4694 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4695 		}
4696 
4697 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
4698 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4699 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4700 			continue;
4701 
4702 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
4703 				dec_id * DCORE_VDEC_OFFSET;
4704 
4705 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
4706 
4707 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
4708 	}
4709 }
4710 
4711 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
4712 					u32 stlb_base, u32 asid, u64 phys_addr)
4713 {
4714 	u32 status, timeout_usec;
4715 	int rc;
4716 
4717 	if (hdev->pldm || !hdev->pdev)
4718 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
4719 	else
4720 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4721 
4722 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
4723 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
4724 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
4725 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
4726 
4727 	rc = hl_poll_timeout(
4728 		hdev,
4729 		stlb_base + STLB_BUSY_OFFSET,
4730 		status,
4731 		!(status & 0x80000000),
4732 		1000,
4733 		timeout_usec);
4734 
4735 	if (rc) {
4736 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
4737 		return rc;
4738 	}
4739 
4740 	return 0;
4741 }
4742 
4743 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
4744 					u32 start_offset, u32 inv_start_val,
4745 					u32 flags)
4746 {
4747 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
4748 	if (flags & MMU_OP_CLEAR_MEMCACHE)
4749 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
4750 
4751 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
4752 		return;
4753 
4754 	WREG32(stlb_base + start_offset, inv_start_val);
4755 }
4756 
4757 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
4758 						struct gaudi2_cache_invld_params *inv_params)
4759 {
4760 	u32 status, timeout_usec, start_offset;
4761 	int rc;
4762 
4763 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
4764 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
4765 
4766 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
4767 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
4768 		rc = hl_poll_timeout(
4769 			hdev,
4770 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
4771 			status,
4772 			status & 0x1,
4773 			1000,
4774 			timeout_usec);
4775 
4776 		if (rc)
4777 			return rc;
4778 
4779 		/* Need to manually reset the status to 0 */
4780 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
4781 	}
4782 
4783 	/* Lower cache does not work with cache lines, hence we can skip its
4784 	 * invalidation upon map and invalidate only upon unmap
4785 	 */
4786 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
4787 		return 0;
4788 
4789 	start_offset = inv_params->range_invalidation ?
4790 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
4791 
4792 	rc = hl_poll_timeout(
4793 		hdev,
4794 		stlb_base + start_offset,
4795 		status,
4796 		!(status & 0x1),
4797 		1000,
4798 		timeout_usec);
4799 
4800 	return rc;
4801 }
4802 
4803 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
4804 {
4805 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4806 	u32 hw_cap;
4807 
4808 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
4809 
4810 	if (gaudi2->hw_cap_initialized & hw_cap)
4811 		return true;
4812 
4813 	return false;
4814 }
4815 
4816 /* this function shall be called only for HMMUs for which capability bit is set */
4817 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
4818 {
4819 	u32 offset;
4820 
4821 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
4822 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
4823 }
4824 
4825 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
4826 						struct gaudi2_cache_invld_params *inv_params)
4827 {
4828 	u32 start_offset;
4829 
4830 	if (inv_params->range_invalidation) {
4831 		/* Set the addresses range
4832 		 * Note: that the start address we set in register, is not included in
4833 		 * the range of the invalidation, by design.
4834 		 * that's why we need to set lower address than the one we actually
4835 		 * want to be included in the range invalidation.
4836 		 */
4837 		u64 start = inv_params->start_va - 1;
4838 
4839 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
4840 
4841 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
4842 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
4843 
4844 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
4845 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
4846 
4847 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
4848 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
4849 
4850 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
4851 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
4852 	} else {
4853 		start_offset = STLB_INV_ALL_START_OFFSET;
4854 	}
4855 
4856 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
4857 						inv_params->inv_start_val, inv_params->flags);
4858 }
4859 
4860 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
4861 						int dcore_id, int hmmu_id,
4862 						struct gaudi2_cache_invld_params *inv_params)
4863 {
4864 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4865 
4866 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
4867 }
4868 
4869 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
4870 						int dcore_id, int hmmu_id,
4871 						struct gaudi2_cache_invld_params *inv_params)
4872 {
4873 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
4874 
4875 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
4876 }
4877 
4878 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
4879 						struct gaudi2_cache_invld_params *inv_params)
4880 {
4881 	int dcore_id, hmmu_id;
4882 
4883 	/* first send all invalidation commands */
4884 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4885 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4886 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4887 				continue;
4888 
4889 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
4890 		}
4891 	}
4892 
4893 	/* next, poll all invalidations status */
4894 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
4895 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
4896 			int rc;
4897 
4898 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
4899 				continue;
4900 
4901 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
4902 										inv_params);
4903 			if (rc)
4904 				return rc;
4905 		}
4906 	}
4907 
4908 	return 0;
4909 }
4910 
4911 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
4912 {
4913 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4914 	struct gaudi2_cache_invld_params invld_params;
4915 	int rc = 0;
4916 
4917 	if (hdev->reset_info.hard_reset_pending)
4918 		return rc;
4919 
4920 	invld_params.range_invalidation = false;
4921 	invld_params.inv_start_val = 1;
4922 
4923 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4924 		invld_params.flags = flags;
4925 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4926 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4927 										&invld_params);
4928 	} else if (flags & MMU_OP_PHYS_PACK) {
4929 		invld_params.flags = 0;
4930 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4931 	}
4932 
4933 	return rc;
4934 }
4935 
4936 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
4937 				u32 flags, u32 asid, u64 va, u64 size)
4938 {
4939 	struct gaudi2_cache_invld_params invld_params = {0};
4940 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4941 	u64 start_va, end_va;
4942 	u32 inv_start_val;
4943 	int rc = 0;
4944 
4945 	if (hdev->reset_info.hard_reset_pending)
4946 		return 0;
4947 
4948 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
4949 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
4950 			asid << MMU_RANGE_INV_ASID_SHIFT);
4951 	start_va = va;
4952 	end_va = start_va + size;
4953 
4954 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
4955 		/* As range invalidation does not support zero address we will
4956 		 * do full invalidation in this case
4957 		 */
4958 		if (start_va) {
4959 			invld_params.range_invalidation = true;
4960 			invld_params.start_va = start_va;
4961 			invld_params.end_va = end_va;
4962 			invld_params.inv_start_val = inv_start_val;
4963 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
4964 		} else {
4965 			invld_params.range_invalidation = false;
4966 			invld_params.inv_start_val = 1;
4967 			invld_params.flags = flags;
4968 		}
4969 
4970 
4971 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
4972 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
4973 										&invld_params);
4974 		if (rc)
4975 			return rc;
4976 
4977 	} else if (flags & MMU_OP_PHYS_PACK) {
4978 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
4979 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
4980 		invld_params.inv_start_val = inv_start_val;
4981 		invld_params.flags = flags;
4982 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
4983 	}
4984 
4985 	return rc;
4986 }
4987 
4988 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
4989 {
4990 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4991 	u64 hop0_addr;
4992 	u32 asid, max_asid = prop->max_asid;
4993 	int rc;
4994 
4995 	/* it takes too much time to init all of the ASIDs on palladium */
4996 	if (hdev->pldm)
4997 		max_asid = min((u32) 8, max_asid);
4998 
4999 	for (asid = 0 ; asid < max_asid ; asid++) {
5000 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5001 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5002 		if (rc) {
5003 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5004 			return rc;
5005 		}
5006 	}
5007 
5008 	return 0;
5009 }
5010 
5011 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5012 {
5013 	u32 status, timeout_usec;
5014 	int rc;
5015 
5016 	if (hdev->pldm || !hdev->pdev)
5017 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5018 	else
5019 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5020 
5021 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5022 
5023 	rc = hl_poll_timeout(
5024 		hdev,
5025 		stlb_base + STLB_SRAM_INIT_OFFSET,
5026 		status,
5027 		!status,
5028 		1000,
5029 		timeout_usec);
5030 
5031 	if (rc)
5032 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5033 
5034 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5035 	if (rc)
5036 		return rc;
5037 
5038 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5039 
5040 	rc = hl_poll_timeout(
5041 		hdev,
5042 		stlb_base + STLB_INV_ALL_START_OFFSET,
5043 		status,
5044 		!status,
5045 		1000,
5046 		timeout_usec);
5047 
5048 	if (rc)
5049 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5050 
5051 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5052 
5053 	return rc;
5054 }
5055 
5056 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5057 {
5058 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5059 	u32 mmu_base, stlb_base;
5060 	int rc;
5061 
5062 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5063 		return 0;
5064 
5065 	mmu_base = mmPMMU_HBW_MMU_BASE;
5066 	stlb_base = mmPMMU_HBW_STLB_BASE;
5067 
5068 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5069 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5070 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5071 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5072 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5073 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5074 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5075 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5076 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5077 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5078 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5079 
5080 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5081 
5082 	if (PAGE_SIZE == SZ_64K) {
5083 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5084 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5085 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5086 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5087 			FIELD_PREP(
5088 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5089 				1),
5090 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5091 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5092 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5093 	}
5094 
5095 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5096 
5097 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5098 	if (rc)
5099 		return rc;
5100 
5101 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5102 
5103 	return 0;
5104 }
5105 
5106 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5107 				int hmmu_id)
5108 {
5109 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5110 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5111 	u32 offset, mmu_base, stlb_base, hw_cap;
5112 	u8 dmmu_seq;
5113 	int rc;
5114 
5115 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5116 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5117 
5118 	/*
5119 	 * return if DMMU is already initialized or if it's not out of
5120 	 * isolation (due to cluster binning)
5121 	 */
5122 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5123 		return 0;
5124 
5125 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5126 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5127 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5128 
5129 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5130 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5131 
5132 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5133 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5134 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5135 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5136 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5137 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5138 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5139 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5140 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5141 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5142 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5143 
5144 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5145 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5146 
5147 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5148 
5149 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5150 	if (rc)
5151 		return rc;
5152 
5153 	gaudi2->hw_cap_initialized |= hw_cap;
5154 
5155 	return 0;
5156 }
5157 
5158 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5159 {
5160 	int rc, dcore_id, hmmu_id;
5161 
5162 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5163 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5164 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5165 			if (rc)
5166 				return rc;
5167 		}
5168 
5169 	return 0;
5170 }
5171 
5172 static int gaudi2_mmu_init(struct hl_device *hdev)
5173 {
5174 	int rc;
5175 
5176 	rc = gaudi2_pci_mmu_init(hdev);
5177 	if (rc)
5178 		return rc;
5179 
5180 	rc = gaudi2_hbm_mmu_init(hdev);
5181 	if (rc)
5182 		return rc;
5183 
5184 	return 0;
5185 }
5186 
5187 static int gaudi2_hw_init(struct hl_device *hdev)
5188 {
5189 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5190 	int rc;
5191 
5192 	/* Let's mark in the H/W that we have reached this point. We check
5193 	 * this value in the reset_before_init function to understand whether
5194 	 * we need to reset the chip before doing H/W init. This register is
5195 	 * cleared by the H/W upon H/W reset
5196 	 */
5197 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5198 
5199 	/* Perform read from the device to make sure device is up */
5200 	RREG32(mmHW_STATE);
5201 
5202 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5203 	 * So we set it here and if anyone tries to move it later to
5204 	 * a different address, there will be an error
5205 	 */
5206 	if (hdev->asic_prop.iatu_done_by_fw)
5207 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5208 
5209 	/*
5210 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5211 	 * base address of dram
5212 	 */
5213 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5214 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5215 		return -EIO;
5216 	}
5217 
5218 	rc = gaudi2_init_cpu(hdev);
5219 	if (rc) {
5220 		dev_err(hdev->dev, "failed to initialize CPU\n");
5221 		return rc;
5222 	}
5223 
5224 	gaudi2_init_scrambler_hbm(hdev);
5225 	gaudi2_init_kdma(hdev);
5226 
5227 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5228 	if (rc) {
5229 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5230 		return rc;
5231 	}
5232 
5233 	rc = gaudi2->cpucp_info_get(hdev);
5234 	if (rc) {
5235 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5236 		return rc;
5237 	}
5238 
5239 	rc = gaudi2_mmu_init(hdev);
5240 	if (rc)
5241 		return rc;
5242 
5243 	gaudi2_init_pdma(hdev);
5244 	gaudi2_init_edma(hdev);
5245 	gaudi2_init_sm(hdev);
5246 	gaudi2_init_tpc(hdev);
5247 	gaudi2_init_mme(hdev);
5248 	gaudi2_init_rotator(hdev);
5249 	gaudi2_init_dec(hdev);
5250 	gaudi2_enable_timestamp(hdev);
5251 
5252 	rc = gaudi2_coresight_init(hdev);
5253 	if (rc)
5254 		goto disable_queues;
5255 
5256 	rc = gaudi2_enable_msix(hdev);
5257 	if (rc)
5258 		goto disable_queues;
5259 
5260 	/* Perform read from the device to flush all configuration */
5261 	RREG32(mmHW_STATE);
5262 
5263 	return 0;
5264 
5265 disable_queues:
5266 	gaudi2_disable_dma_qmans(hdev);
5267 	gaudi2_disable_mme_qmans(hdev);
5268 	gaudi2_disable_tpc_qmans(hdev);
5269 	gaudi2_disable_rot_qmans(hdev);
5270 	gaudi2_disable_nic_qmans(hdev);
5271 
5272 	gaudi2_disable_timestamp(hdev);
5273 
5274 	return rc;
5275 }
5276 
5277 /**
5278  * gaudi2_send_hard_reset_cmd - common function to handle reset
5279  *
5280  * @hdev: pointer to the habanalabs device structure
5281  *
5282  * This function handles the various possible scenarios for reset.
5283  * It considers if reset is handled by driver\FW and what FW components are loaded
5284  */
5285 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5286 {
5287 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5288 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
5289 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5290 	u32 cpu_boot_status;
5291 
5292 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5293 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5294 
5295 	/*
5296 	 * Handle corner case where failure was at cpu management app load,
5297 	 * and driver didn't detect any failure while loading the FW,
5298 	 * then at such scenario driver will send only HALT_MACHINE
5299 	 * and no one will respond to this request since FW already back to preboot
5300 	 * and it cannot handle such cmd.
5301 	 * In this case next time the management app loads it'll check on events register
5302 	 * which will still have the halt indication, and will reboot the device.
5303 	 * The solution is to let preboot clear all relevant registers before next boot
5304 	 * once driver send COMMS_RST_DEV.
5305 	 */
5306 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5307 
5308 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5309 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5310 		cpu_initialized = true;
5311 
5312 	/*
5313 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
5314 	 * 1. FW reset: FW initiate the reset sequence
5315 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
5316 	 *                  reset but not the reset itself as it is not implemented
5317 	 *                  on their part) and LKD will wait to let FW complete the
5318 	 *                  sequence before issuing the reset
5319 	 */
5320 	if (!preboot_only && cpu_initialized) {
5321 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
5322 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
5323 
5324 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
5325 	}
5326 
5327 	/*
5328 	 * When working with preboot (without Linux/Boot fit) we can
5329 	 * communicate only using the COMMS commands to issue halt/reset.
5330 	 *
5331 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
5332 	 * attempt to revive the card in the small chance that the f/w has
5333 	 * experienced a watchdog event, which caused it to return back to preboot.
5334 	 * In that case, triggering reset through GIC won't help. We need to
5335 	 * trigger the reset as if Linux wasn't loaded.
5336 	 *
5337 	 * We do it only if the reset cause was HB, because that would be the
5338 	 * indication of such an event.
5339 	 *
5340 	 * In case watchdog hasn't expired but we still got HB, then this won't
5341 	 * do any damage.
5342 	 */
5343 
5344 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
5345 		if (hdev->asic_prop.hard_reset_done_by_fw)
5346 			hl_fw_ask_hard_reset_without_linux(hdev);
5347 		else
5348 			hl_fw_ask_halt_machine_without_linux(hdev);
5349 	}
5350 }
5351 
5352 /**
5353  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
5354  *
5355  * @hdev: pointer to the habanalabs device structure
5356  * @reset_sleep_ms: sleep time in msec after reset
5357  *
5358  * This function executes hard reset based on if driver/FW should do the reset
5359  */
5360 static void gaudi2_execute_hard_reset(struct hl_device *hdev, u32 reset_sleep_ms)
5361 {
5362 	if (hdev->asic_prop.hard_reset_done_by_fw) {
5363 		gaudi2_send_hard_reset_cmd(hdev);
5364 		return;
5365 	}
5366 
5367 	/* Set device to handle FLR by H/W as we will put the device
5368 	 * CPU to halt mode
5369 	 */
5370 	WREG32(mmPCIE_AUX_FLR_CTRL,
5371 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
5372 
5373 	gaudi2_send_hard_reset_cmd(hdev);
5374 
5375 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
5376 }
5377 
5378 /**
5379  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
5380  *
5381  * @hdev: pointer to the habanalabs device structure
5382  * @reset_sleep_ms: sleep time in msec after reset
5383  * @driver_performs_reset: true if driver should perform reset instead of f/w.
5384  *
5385  * This function executes soft reset based on if driver/FW should do the reset
5386  */
5387 static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms,
5388 						bool driver_performs_reset)
5389 {
5390 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5391 
5392 	if (!driver_performs_reset) {
5393 		/* set SP to indicate reset request sent to FW */
5394 		if (dyn_regs->cpu_rst_status)
5395 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
5396 		else
5397 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
5398 
5399 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
5400 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
5401 		return;
5402 	}
5403 
5404 	/* Block access to engines, QMANs and SM during reset, these
5405 	 * RRs will be reconfigured after soft reset.
5406 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
5407 	 */
5408 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
5409 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
5410 
5411 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
5412 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
5413 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
5414 
5415 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
5416 }
5417 
5418 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 reset_sleep_ms,
5419 								u32 poll_timeout_us)
5420 {
5421 	int i, rc = 0;
5422 	u32 reg_val;
5423 
5424 	/* without this sleep reset will not work */
5425 	msleep(reset_sleep_ms);
5426 
5427 	/* We poll the BTM done indication multiple times after reset due to
5428 	 * a HW errata 'GAUDI2_0300'
5429 	 */
5430 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5431 		rc = hl_poll_timeout(
5432 			hdev,
5433 			mmPSOC_GLOBAL_CONF_BTM_FSM,
5434 			reg_val,
5435 			reg_val == 0,
5436 			1000,
5437 			poll_timeout_us);
5438 
5439 	if (rc)
5440 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
5441 }
5442 
5443 static void gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
5444 {
5445 	int i, rc = 0;
5446 	u32 reg_val;
5447 
5448 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
5449 		rc = hl_poll_timeout(
5450 			hdev,
5451 			mmCPU_RST_STATUS_TO_HOST,
5452 			reg_val,
5453 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
5454 			1000,
5455 			poll_timeout_us);
5456 
5457 	if (rc)
5458 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
5459 				reg_val);
5460 }
5461 
5462 static void gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
5463 {
5464 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5465 	u32 poll_timeout_us, reset_sleep_ms;
5466 	bool driver_performs_reset = false;
5467 
5468 	if (hdev->pldm) {
5469 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
5470 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
5471 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
5472 	} else {
5473 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
5474 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
5475 	}
5476 
5477 	if (fw_reset)
5478 		goto skip_reset;
5479 
5480 	gaudi2_reset_arcs(hdev);
5481 
5482 	if (hard_reset) {
5483 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
5484 		gaudi2_execute_hard_reset(hdev, reset_sleep_ms);
5485 	} else {
5486 		/*
5487 		 * As we have to support also work with preboot only (which does not supports
5488 		 * soft reset) we have to make sure that security is disabled before letting driver
5489 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
5490 		 * secured device with preboot only.
5491 		 */
5492 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
5493 							!hdev->asic_prop.fw_security_enabled);
5494 		gaudi2_execute_soft_reset(hdev, reset_sleep_ms, driver_performs_reset);
5495 	}
5496 
5497 skip_reset:
5498 	if (driver_performs_reset || hard_reset)
5499 		/*
5500 		 * Instead of waiting for BTM indication we should wait for preboot ready:
5501 		 * Consider the below scenario:
5502 		 * 1. FW update is being triggered
5503 		 *        - setting the dirty bit
5504 		 * 2. hard reset will be triggered due to the dirty bit
5505 		 * 3. FW initiates the reset:
5506 		 *        - dirty bit cleared
5507 		 *        - BTM indication cleared
5508 		 *        - preboot ready indication cleared
5509 		 * 4. during hard reset:
5510 		 *        - BTM indication will be set
5511 		 *        - BIST test performed and another reset triggered
5512 		 * 5. only after this reset the preboot will set the preboot ready
5513 		 *
5514 		 * when polling on BTM indication alone we can lose sync with FW while trying to
5515 		 * communicate with FW that is during reset.
5516 		 * to overcome this we will always wait to preboot ready indication
5517 		 */
5518 		if ((hdev->fw_components & FW_TYPE_PREBOOT_CPU)) {
5519 			msleep(reset_sleep_ms);
5520 			hl_fw_wait_preboot_ready(hdev);
5521 		} else {
5522 			gaudi2_poll_btm_indication(hdev, reset_sleep_ms, poll_timeout_us);
5523 		}
5524 	else
5525 		gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
5526 
5527 	if (!gaudi2)
5528 		return;
5529 
5530 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
5531 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
5532 
5533 	/*
5534 	 * Clear NIC capability mask in order for driver to re-configure
5535 	 * NIC QMANs. NIC ports will not be re-configured during soft
5536 	 * reset as we call gaudi2_nic_init only during hard reset
5537 	 */
5538 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
5539 
5540 	if (hard_reset) {
5541 		gaudi2->hw_cap_initialized &=
5542 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
5543 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
5544 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
5545 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
5546 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
5547 
5548 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
5549 	} else {
5550 		gaudi2->hw_cap_initialized &=
5551 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
5552 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
5553 			HW_CAP_ROT_MASK);
5554 	}
5555 }
5556 
5557 static int gaudi2_suspend(struct hl_device *hdev)
5558 {
5559 	int rc;
5560 
5561 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
5562 	if (rc)
5563 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
5564 
5565 	return rc;
5566 }
5567 
5568 static int gaudi2_resume(struct hl_device *hdev)
5569 {
5570 	return gaudi2_init_iatu(hdev);
5571 }
5572 
5573 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5574 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
5575 {
5576 	int rc;
5577 
5578 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
5579 			VM_DONTCOPY | VM_NORESERVE;
5580 
5581 #ifdef _HAS_DMA_MMAP_COHERENT
5582 
5583 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
5584 	if (rc)
5585 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
5586 
5587 #else
5588 
5589 	rc = remap_pfn_range(vma, vma->vm_start,
5590 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
5591 				size, vma->vm_page_prot);
5592 	if (rc)
5593 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
5594 
5595 #endif
5596 
5597 	return rc;
5598 }
5599 
5600 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
5601 {
5602 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5603 	u64 hw_cap_mask = 0;
5604 	u64 hw_tpc_cap_bit = 0;
5605 	u64 hw_nic_cap_bit = 0;
5606 	u64 hw_test_cap_bit = 0;
5607 
5608 	switch (hw_queue_id) {
5609 	case GAUDI2_QUEUE_ID_PDMA_0_0:
5610 	case GAUDI2_QUEUE_ID_PDMA_0_1:
5611 	case GAUDI2_QUEUE_ID_PDMA_1_0:
5612 		hw_cap_mask = HW_CAP_PDMA_MASK;
5613 		break;
5614 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5615 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
5616 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
5617 		break;
5618 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5619 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
5620 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
5621 		break;
5622 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5623 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
5624 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
5625 		break;
5626 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5627 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
5628 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
5629 		break;
5630 
5631 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5632 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
5633 		break;
5634 
5635 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5636 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
5637 		break;
5638 
5639 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5640 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
5641 		break;
5642 
5643 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5644 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
5645 		break;
5646 
5647 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
5648 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
5649 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
5650 
5651 		/* special case where cap bit refers to the first queue id */
5652 		if (!hw_tpc_cap_bit)
5653 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
5654 		break;
5655 
5656 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5657 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
5658 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
5659 		break;
5660 
5661 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5662 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
5663 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
5664 		break;
5665 
5666 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5667 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
5668 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
5669 		break;
5670 
5671 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5672 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
5673 		break;
5674 
5675 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
5676 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
5677 		break;
5678 
5679 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
5680 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
5681 
5682 		/* special case where cap bit refers to the first queue id */
5683 		if (!hw_nic_cap_bit)
5684 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
5685 		break;
5686 
5687 	case GAUDI2_QUEUE_ID_CPU_PQ:
5688 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
5689 
5690 	default:
5691 		return false;
5692 	}
5693 
5694 	if (hw_tpc_cap_bit)
5695 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
5696 
5697 	if (hw_nic_cap_bit)
5698 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
5699 
5700 	if (hw_test_cap_bit)
5701 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
5702 
5703 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
5704 }
5705 
5706 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
5707 {
5708 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5709 
5710 	switch (arc_id) {
5711 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5712 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5713 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
5714 
5715 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5716 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5717 
5718 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5719 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5720 
5721 	default:
5722 		return false;
5723 	}
5724 }
5725 
5726 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5727 {
5728 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5729 
5730 	switch (arc_id) {
5731 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5732 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5733 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
5734 		break;
5735 
5736 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5737 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
5738 		break;
5739 
5740 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5741 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
5742 		break;
5743 
5744 	default:
5745 		return;
5746 	}
5747 }
5748 
5749 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
5750 {
5751 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5752 
5753 	switch (arc_id) {
5754 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
5755 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
5756 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
5757 		break;
5758 
5759 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
5760 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
5761 		break;
5762 
5763 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
5764 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
5765 		break;
5766 
5767 	default:
5768 		return;
5769 	}
5770 }
5771 
5772 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
5773 {
5774 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5775 	u32 pq_offset, reg_base, db_reg_offset, db_value;
5776 
5777 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
5778 		/*
5779 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
5780 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
5781 		 * number.
5782 		 */
5783 		pq_offset = (hw_queue_id & 0x3) * 4;
5784 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
5785 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
5786 	} else {
5787 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
5788 	}
5789 
5790 	db_value = pi;
5791 
5792 	/* ring the doorbell */
5793 	WREG32(db_reg_offset, db_value);
5794 
5795 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
5796 		/* make sure device CPU will read latest data from host */
5797 		mb();
5798 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
5799 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
5800 	}
5801 }
5802 
5803 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
5804 {
5805 	__le64 *pbd = (__le64 *) bd;
5806 
5807 	/* The QMANs are on the host memory so a simple copy suffice */
5808 	pqe[0] = pbd[0];
5809 	pqe[1] = pbd[1];
5810 }
5811 
5812 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
5813 				dma_addr_t *dma_handle, gfp_t flags)
5814 {
5815 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
5816 }
5817 
5818 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
5819 				void *cpu_addr, dma_addr_t dma_handle)
5820 {
5821 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
5822 }
5823 
5824 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
5825 				u32 timeout, u64 *result)
5826 {
5827 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5828 
5829 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
5830 		if (result)
5831 			*result = 0;
5832 		return 0;
5833 	}
5834 
5835 	if (!timeout)
5836 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
5837 
5838 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
5839 }
5840 
5841 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5842 				gfp_t mem_flags, dma_addr_t *dma_handle)
5843 {
5844 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
5845 		return NULL;
5846 
5847 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5848 }
5849 
5850 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
5851 {
5852 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
5853 }
5854 
5855 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
5856 						dma_addr_t *dma_handle)
5857 {
5858 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5859 }
5860 
5861 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
5862 {
5863 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5864 }
5865 
5866 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
5867 					enum dma_data_direction dir)
5868 {
5869 	dma_addr_t dma_addr;
5870 
5871 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
5872 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
5873 		return 0;
5874 
5875 	return dma_addr;
5876 }
5877 
5878 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
5879 					enum dma_data_direction dir)
5880 {
5881 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
5882 }
5883 
5884 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
5885 {
5886 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5887 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5888 
5889 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
5890 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5891 		return -EINVAL;
5892 	}
5893 
5894 	/* Just check if CB address is valid */
5895 
5896 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5897 					parser->user_cb_size,
5898 					asic_prop->sram_user_base_address,
5899 					asic_prop->sram_end_address))
5900 		return 0;
5901 
5902 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5903 					parser->user_cb_size,
5904 					asic_prop->dram_user_base_address,
5905 					asic_prop->dram_end_address))
5906 		return 0;
5907 
5908 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
5909 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5910 						parser->user_cb_size,
5911 						asic_prop->dmmu.start_addr,
5912 						asic_prop->dmmu.end_addr))
5913 		return 0;
5914 
5915 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
5916 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5917 					parser->user_cb_size,
5918 					asic_prop->pmmu.start_addr,
5919 					asic_prop->pmmu.end_addr) ||
5920 			hl_mem_area_inside_range(
5921 					(u64) (uintptr_t) parser->user_cb,
5922 					parser->user_cb_size,
5923 					asic_prop->pmmu_huge.start_addr,
5924 					asic_prop->pmmu_huge.end_addr))
5925 			return 0;
5926 
5927 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
5928 		if (!hdev->pdev)
5929 			return 0;
5930 
5931 		if (!device_iommu_mapped(&hdev->pdev->dev))
5932 			return 0;
5933 	}
5934 
5935 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
5936 		parser->user_cb, parser->user_cb_size);
5937 
5938 	return -EFAULT;
5939 }
5940 
5941 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5942 {
5943 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5944 
5945 	if (!parser->is_kernel_allocated_cb)
5946 		return gaudi2_validate_cb_address(hdev, parser);
5947 
5948 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5949 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
5950 		return -EINVAL;
5951 	}
5952 
5953 	return 0;
5954 }
5955 
5956 static int gaudi2_send_heartbeat(struct hl_device *hdev)
5957 {
5958 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5959 
5960 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
5961 		return 0;
5962 
5963 	return hl_fw_send_heartbeat(hdev);
5964 }
5965 
5966 /* This is an internal helper function, used to update the KDMA mmu props.
5967  * Should be called with a proper kdma lock.
5968  */
5969 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
5970 					   bool mmu_bypass, u32 asid)
5971 {
5972 	u32 rw_asid, rw_mmu_bp;
5973 
5974 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
5975 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
5976 
5977 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
5978 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
5979 
5980 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
5981 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
5982 }
5983 
5984 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
5985 						u32 mon_payload, u32 sync_value)
5986 {
5987 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
5988 	u8 mask;
5989 
5990 	sob_offset = sob_id * 4;
5991 	mon_offset = mon_id * 4;
5992 
5993 	/* Reset the SOB value */
5994 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5995 
5996 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
5997 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
5998 
5999 	/* Configure this address with CS index because CQ_EN is set */
6000 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6001 
6002 	sync_group_id = sob_id / 8;
6003 	mask = ~(1 << (sob_id & 0x7));
6004 	mode = 1; /* comparison mode is "equal to" */
6005 
6006 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6007 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6008 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6009 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6010 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6011 }
6012 
6013 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6014 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6015 					u64 src_addr, u64 dst_addr,
6016 					u32 size, bool is_memset)
6017 {
6018 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6019 	struct hl_cq_entry *cq_base;
6020 	struct hl_cq *cq;
6021 	u64 comp_addr;
6022 	int rc;
6023 
6024 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6025 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6026 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6027 
6028 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6029 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6030 
6031 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6032 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6033 
6034 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6035 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6036 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6037 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6038 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6039 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6040 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6041 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6042 
6043 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6044 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6045 
6046 	if (is_memset)
6047 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6048 
6049 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6050 
6051 	/* Wait for completion */
6052 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6053 	cq_base = cq->kernel_address;
6054 	polling_addr = (u32 *)&cq_base[cq->ci];
6055 
6056 	if (hdev->pldm)
6057 		/* for each 1MB 20 second of timeout */
6058 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6059 	else
6060 		timeout = KDMA_TIMEOUT_USEC;
6061 
6062 	/* Polling */
6063 	rc = hl_poll_timeout_memory(
6064 			hdev,
6065 			polling_addr,
6066 			status,
6067 			(status == 1),
6068 			1000,
6069 			timeout,
6070 			true);
6071 
6072 	*polling_addr = 0;
6073 
6074 	if (rc) {
6075 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6076 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6077 		return rc;
6078 	}
6079 
6080 	cq->ci = hl_cq_inc_ptr(cq->ci);
6081 
6082 	return 0;
6083 }
6084 
6085 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6086 {
6087 	u32 i;
6088 
6089 	for (i = 0 ; i < size ; i += sizeof(u32))
6090 		WREG32(addr + i, val);
6091 }
6092 
6093 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6094 {
6095 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6096 
6097 	if (enable) {
6098 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6099 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6100 	} else {
6101 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6102 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6103 	}
6104 }
6105 
6106 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6107 {
6108 	u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6109 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6110 	u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6111 	struct packet_msg_short *msg_short_pkt;
6112 	dma_addr_t pkt_dma_addr;
6113 	size_t pkt_size;
6114 	int rc;
6115 
6116 	if (hdev->pldm)
6117 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6118 	else
6119 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6120 
6121 	pkt_size = sizeof(*msg_short_pkt);
6122 	msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6123 	if (!msg_short_pkt) {
6124 		dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6125 			hw_queue_id);
6126 		return -ENOMEM;
6127 	}
6128 
6129 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6130 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6131 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6132 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6133 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6134 
6135 	msg_short_pkt->value = cpu_to_le32(sob_val);
6136 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6137 
6138 	/* Reset the SOB value */
6139 	WREG32(sob_addr, 0);
6140 
6141 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6142 	if (rc) {
6143 		dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6144 			hw_queue_id);
6145 		goto free_pkt;
6146 	}
6147 
6148 	rc = hl_poll_timeout(
6149 			hdev,
6150 			sob_addr,
6151 			tmp,
6152 			(tmp == sob_val),
6153 			1000,
6154 			timeout_usec);
6155 
6156 	if (rc == -ETIMEDOUT) {
6157 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6158 			hw_queue_id, tmp);
6159 		rc = -EIO;
6160 	}
6161 
6162 	/* Reset the SOB value */
6163 	WREG32(sob_addr, 0);
6164 
6165 free_pkt:
6166 	hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6167 	return rc;
6168 }
6169 
6170 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6171 {
6172 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6173 
6174 	/*
6175 	 * check capability here as send_cpu_message() won't update the result
6176 	 * value if no capability
6177 	 */
6178 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6179 		return 0;
6180 
6181 	return hl_fw_test_cpu_queue(hdev);
6182 }
6183 
6184 static int gaudi2_test_queues(struct hl_device *hdev)
6185 {
6186 	int i, rc, ret_val = 0;
6187 
6188 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6189 		if (!gaudi2_is_queue_enabled(hdev, i))
6190 			continue;
6191 
6192 		gaudi2_qman_set_test_mode(hdev, i, true);
6193 		rc = gaudi2_test_queue(hdev, i);
6194 		gaudi2_qman_set_test_mode(hdev, i, false);
6195 
6196 		if (rc) {
6197 			ret_val = -EINVAL;
6198 			goto done;
6199 		}
6200 	}
6201 
6202 	rc = gaudi2_test_cpu_queue(hdev);
6203 	if (rc) {
6204 		ret_val = -EINVAL;
6205 		goto done;
6206 	}
6207 
6208 done:
6209 	return ret_val;
6210 }
6211 
6212 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6213 {
6214 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6215 	size_t irq_arr_size;
6216 
6217 	/* TODO: missing gaudi2_nic_resume.
6218 	 * Until implemented nic_hw_cap_initialized will remain zeroed
6219 	 */
6220 	gaudi2_init_arcs(hdev);
6221 	gaudi2_scrub_arcs_dccm(hdev);
6222 	gaudi2_init_security(hdev);
6223 
6224 	/* Unmask all IRQs since some could have been received during the soft reset */
6225 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6226 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6227 }
6228 
6229 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
6230 					struct iterate_module_ctx *ctx)
6231 {
6232 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
6233 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
6234 	bool is_eng_idle;
6235 	int engine_idx;
6236 
6237 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
6238 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
6239 	else
6240 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
6241 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
6242 
6243 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
6244 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
6245 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
6246 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
6247 
6248 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6249 						IS_TPC_IDLE(tpc_cfg_sts);
6250 	*(idle_data->is_idle) &= is_eng_idle;
6251 
6252 	if (idle_data->mask && !is_eng_idle)
6253 		set_bit(engine_idx, idle_data->mask);
6254 
6255 	if (idle_data->e)
6256 		hl_engine_data_sprintf(idle_data->e,
6257 					idle_data->tpc_fmt, dcore, inst,
6258 					is_eng_idle ? "Y" : "N",
6259 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6260 }
6261 
6262 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6263 					struct engines_data *e)
6264 {
6265 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask,
6266 		mme_arch_sts, dec_swreg15, dec_enabled_bit;
6267 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6268 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-12x%s\n";
6269 	unsigned long *mask = (unsigned long *) mask_arr;
6270 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#x\n";
6271 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
6272 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
6273 	const char *pdma_fmt = "%-6d%-9s%#-14x%#x\n";
6274 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
6275 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
6276 	bool is_idle = true, is_eng_idle;
6277 	u64 offset;
6278 
6279 	struct gaudi2_tpc_idle_data tpc_idle_data = {
6280 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
6281 		.e = e,
6282 		.mask = mask,
6283 		.is_idle = &is_idle,
6284 	};
6285 	struct iterate_module_ctx tpc_iter = {
6286 		.fn = &gaudi2_is_tpc_engine_idle,
6287 		.data = &tpc_idle_data,
6288 	};
6289 
6290 	int engine_idx, i, j;
6291 
6292 	/* EDMA, Two engines per Dcore */
6293 	if (e)
6294 		hl_engine_data_sprintf(e,
6295 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6296 			"----  ----  -------  ------------  ----------------------\n");
6297 
6298 	for (i = 0; i < NUM_OF_DCORES; i++) {
6299 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6300 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6301 
6302 			if (!(prop->edma_enabled_mask & BIT(seq)))
6303 				continue;
6304 
6305 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6306 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6307 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6308 
6309 			dma_core_idle_ind_mask =
6310 			RREG32(mmDCORE0_EDMA0_CORE_IDLE_IND_MASK + offset);
6311 
6312 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6313 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6314 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6315 
6316 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6317 					IS_DMA_IDLE(dma_core_idle_ind_mask);
6318 			is_idle &= is_eng_idle;
6319 
6320 			if (mask && !is_eng_idle)
6321 				set_bit(engine_idx, mask);
6322 
6323 			if (e)
6324 				hl_engine_data_sprintf(e, edma_fmt, i, j,
6325 							is_eng_idle ? "Y" : "N",
6326 							qm_glbl_sts0,
6327 							dma_core_idle_ind_mask);
6328 		}
6329 	}
6330 
6331 	/* PDMA, Two engines in Full chip */
6332 	if (e)
6333 		hl_engine_data_sprintf(e,
6334 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_IDLE_IND_MASK\n"
6335 					"----  -------  ------------  ----------------------\n");
6336 
6337 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6338 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6339 		offset = i * PDMA_OFFSET;
6340 		dma_core_idle_ind_mask = RREG32(mmPDMA0_CORE_IDLE_IND_MASK + offset);
6341 
6342 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6343 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6344 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6345 
6346 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6347 				IS_DMA_IDLE(dma_core_idle_ind_mask);
6348 		is_idle &= is_eng_idle;
6349 
6350 		if (mask && !is_eng_idle)
6351 			set_bit(engine_idx, mask);
6352 
6353 		if (e)
6354 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
6355 						qm_glbl_sts0, dma_core_idle_ind_mask);
6356 	}
6357 
6358 	/* NIC, twelve macros in Full chip */
6359 	if (e && hdev->nic_ports_mask)
6360 		hl_engine_data_sprintf(e,
6361 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
6362 					"---  -------  ------------  ----------\n");
6363 
6364 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6365 		if (!(i & 1))
6366 			offset = i / 2 * NIC_OFFSET;
6367 		else
6368 			offset += NIC_QM_OFFSET;
6369 
6370 		if (!(hdev->nic_ports_mask & BIT(i)))
6371 			continue;
6372 
6373 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
6374 
6375 
6376 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
6377 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
6378 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
6379 
6380 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6381 		is_idle &= is_eng_idle;
6382 
6383 		if (mask && !is_eng_idle)
6384 			set_bit(engine_idx, mask);
6385 
6386 		if (e)
6387 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
6388 						qm_glbl_sts0, qm_cgm_sts);
6389 	}
6390 
6391 	if (e)
6392 		hl_engine_data_sprintf(e,
6393 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
6394 					"---  ----  -------  ------------  ---------------\n");
6395 	/* MME, one per Dcore */
6396 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6397 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
6398 		offset = i * DCORE_OFFSET;
6399 
6400 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
6401 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
6402 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
6403 
6404 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6405 		is_idle &= is_eng_idle;
6406 
6407 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
6408 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
6409 		is_idle &= is_eng_idle;
6410 
6411 		if (e)
6412 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
6413 				is_eng_idle ? "Y" : "N",
6414 				qm_glbl_sts0,
6415 				mme_arch_sts);
6416 
6417 		if (mask && !is_eng_idle)
6418 			set_bit(engine_idx, mask);
6419 	}
6420 
6421 	/*
6422 	 * TPC
6423 	 */
6424 	if (e && prop->tpc_enabled_mask)
6425 		hl_engine_data_sprintf(e,
6426 			"\nCORE  TPC   is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_IDLE_IND_MASK\n"
6427 			"----  ---  --------  ------------  ----------  ----------------------\n");
6428 
6429 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6430 
6431 	/* Decoders, two each Dcore and two shared PCIe decoders */
6432 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
6433 		hl_engine_data_sprintf(e,
6434 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
6435 			"----  ---  -------  ---------------\n");
6436 
6437 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
6438 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
6439 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
6440 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
6441 				continue;
6442 
6443 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
6444 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6445 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
6446 
6447 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
6448 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6449 			is_idle &= is_eng_idle;
6450 
6451 			if (mask && !is_eng_idle)
6452 				set_bit(engine_idx, mask);
6453 
6454 			if (e)
6455 				hl_engine_data_sprintf(e, dec_fmt, i, j,
6456 							is_eng_idle ? "Y" : "N", dec_swreg15);
6457 		}
6458 	}
6459 
6460 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
6461 		hl_engine_data_sprintf(e,
6462 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
6463 			"--------  -------  ---------------\n");
6464 
6465 	/* Check shared(PCIe) decoders */
6466 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
6467 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
6468 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
6469 			continue;
6470 
6471 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
6472 		offset = i * DCORE_DEC_OFFSET;
6473 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
6474 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
6475 		is_idle &= is_eng_idle;
6476 
6477 		if (mask && !is_eng_idle)
6478 			set_bit(engine_idx, mask);
6479 
6480 		if (e)
6481 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
6482 						is_eng_idle ? "Y" : "N", dec_swreg15);
6483 	}
6484 
6485 	if (e)
6486 		hl_engine_data_sprintf(e,
6487 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6488 			"----  ----  -------  ------------  ----------  -------------\n");
6489 
6490 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6491 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
6492 
6493 		offset = i * ROT_OFFSET;
6494 
6495 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
6496 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
6497 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
6498 
6499 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
6500 		is_idle &= is_eng_idle;
6501 
6502 		if (mask && !is_eng_idle)
6503 			set_bit(engine_idx, mask);
6504 
6505 		if (e)
6506 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
6507 					qm_glbl_sts0, qm_cgm_sts, "-");
6508 	}
6509 
6510 	return is_idle;
6511 }
6512 
6513 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
6514 	__acquires(&gaudi2->hw_queues_lock)
6515 {
6516 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6517 
6518 	spin_lock(&gaudi2->hw_queues_lock);
6519 }
6520 
6521 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
6522 	__releases(&gaudi2->hw_queues_lock)
6523 {
6524 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6525 
6526 	spin_unlock(&gaudi2->hw_queues_lock);
6527 }
6528 
6529 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
6530 {
6531 	return hdev->pdev->device;
6532 }
6533 
6534 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
6535 {
6536 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6537 
6538 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6539 		return 0;
6540 
6541 	return hl_fw_get_eeprom_data(hdev, data, max_size);
6542 }
6543 
6544 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
6545 {
6546 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
6547 }
6548 
6549 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
6550 {
6551 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6552 
6553 	if (aggregate) {
6554 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
6555 		return gaudi2->events_stat_aggregate;
6556 	}
6557 
6558 	*size = (u32) sizeof(gaudi2->events_stat);
6559 	return gaudi2->events_stat;
6560 }
6561 
6562 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
6563 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6564 {
6565 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
6566 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
6567 
6568 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6569 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6570 
6571 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6572 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6573 
6574 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6575 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6576 
6577 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6578 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6579 
6580 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6581 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6582 }
6583 
6584 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
6585 {
6586 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6587 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6588 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6589 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
6590 	u32 vdec_id, i, ports_offset, reg_val;
6591 	u8 edma_seq_base;
6592 
6593 	/* EDMA */
6594 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
6595 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
6596 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6597 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6598 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6599 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6600 	}
6601 
6602 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
6603 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6604 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6605 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
6606 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
6607 	}
6608 
6609 	/* Sync Mngr */
6610 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
6611 	/*
6612 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
6613 	 * for any access type
6614 	 */
6615 	if (dcore_id > 0) {
6616 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
6617 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
6618 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
6619 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
6620 	}
6621 
6622 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
6623 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
6624 
6625 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
6626 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
6627 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
6628 				dcore_offset + ports_offset, 0);
6629 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
6630 				dcore_offset + ports_offset, rw_asid);
6631 	}
6632 
6633 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
6634 		ports_offset = i * DCORE_MME_WB_OFFSET;
6635 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
6636 				dcore_offset + ports_offset, 0);
6637 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
6638 				dcore_offset + ports_offset, rw_asid);
6639 	}
6640 
6641 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
6642 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
6643 
6644 	/*
6645 	 * Decoders
6646 	 */
6647 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
6648 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
6649 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
6650 	}
6651 }
6652 
6653 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
6654 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
6655 {
6656 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
6657 
6658 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
6659 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
6660 
6661 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
6662 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
6663 
6664 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
6665 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
6666 
6667 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
6668 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
6669 
6670 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
6671 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
6672 }
6673 
6674 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
6675 							u32 rw_asid, u32 rw_mmu_bp)
6676 {
6677 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
6678 
6679 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
6680 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
6681 }
6682 
6683 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
6684 {
6685 	u32 reg_base, reg_offset, reg_val = 0;
6686 
6687 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
6688 
6689 	/* Enable MMU and configure asid for all relevant ARC regions */
6690 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
6691 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
6692 
6693 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
6694 	WREG32(reg_base + reg_offset, reg_val);
6695 
6696 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
6697 	WREG32(reg_base + reg_offset, reg_val);
6698 
6699 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
6700 	WREG32(reg_base + reg_offset, reg_val);
6701 
6702 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
6703 	WREG32(reg_base + reg_offset, reg_val);
6704 
6705 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
6706 	WREG32(reg_base + reg_offset, reg_val);
6707 
6708 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
6709 	WREG32(reg_base + reg_offset, reg_val);
6710 
6711 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
6712 	WREG32(reg_base + reg_offset, reg_val);
6713 
6714 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
6715 	WREG32(reg_base + reg_offset, reg_val);
6716 
6717 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
6718 	WREG32(reg_base + reg_offset, reg_val);
6719 
6720 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
6721 	WREG32(reg_base + reg_offset, reg_val);
6722 
6723 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
6724 	WREG32(reg_base + reg_offset, reg_val);
6725 }
6726 
6727 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
6728 {
6729 	int i;
6730 
6731 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
6732 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
6733 
6734 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6735 		gaudi2_arc_mmu_prepare(hdev, i, asid);
6736 
6737 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
6738 		if (!gaudi2_is_queue_enabled(hdev, i))
6739 			continue;
6740 
6741 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
6742 	}
6743 
6744 	return 0;
6745 }
6746 
6747 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
6748 {
6749 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6750 	u32 rw_asid, offset;
6751 	int rc, i;
6752 
6753 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
6754 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
6755 
6756 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6757 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6758 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6759 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6760 
6761 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
6762 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
6763 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
6764 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
6765 
6766 	/* ROT */
6767 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
6768 		offset = i * ROT_OFFSET;
6769 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
6770 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6771 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
6772 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
6773 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
6774 	}
6775 
6776 	/* Shared Decoders are the last bits in the decoders mask */
6777 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
6778 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
6779 
6780 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
6781 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
6782 
6783 	/* arc farm arc dup eng */
6784 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
6785 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
6786 
6787 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
6788 	if (rc)
6789 		return rc;
6790 
6791 	return 0;
6792 }
6793 
6794 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
6795 					struct iterate_module_ctx *ctx)
6796 {
6797 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
6798 
6799 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
6800 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
6801 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
6802 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
6803 }
6804 
6805 /* zero the MMUBP and set the ASID */
6806 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
6807 {
6808 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6809 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
6810 	struct iterate_module_ctx tpc_iter = {
6811 		.fn = &gaudi2_tpc_mmu_prepare,
6812 		.data = &tpc_mmu_data,
6813 	};
6814 	int rc, i;
6815 
6816 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
6817 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6818 		return -EINVAL;
6819 	}
6820 
6821 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
6822 		return 0;
6823 
6824 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
6825 	if (rc)
6826 		return rc;
6827 
6828 	/* configure DCORE MMUs */
6829 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6830 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6831 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
6832 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
6833 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
6834 
6835 	return 0;
6836 }
6837 
6838 static inline bool is_info_event(u32 event)
6839 {
6840 	switch (event) {
6841 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
6842 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
6843 
6844 	/* return in case of NIC status event - these events are received periodically and not as
6845 	 * an indication to an error.
6846 	 */
6847 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
6848 		return true;
6849 	default:
6850 		return false;
6851 	}
6852 }
6853 
6854 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
6855 			bool ratelimited, const char *fmt, ...)
6856 {
6857 	struct va_format vaf;
6858 	va_list args;
6859 
6860 	va_start(args, fmt);
6861 	vaf.fmt = fmt;
6862 	vaf.va = &args;
6863 
6864 	if (ratelimited)
6865 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
6866 			gaudi2_irq_map_table[event_type].valid ?
6867 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
6868 	else
6869 		dev_err(hdev->dev, "%s: %pV\n",
6870 			gaudi2_irq_map_table[event_type].valid ?
6871 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
6872 
6873 	va_end(args);
6874 }
6875 
6876 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6877 		struct hl_eq_ecc_data *ecc_data)
6878 {
6879 	u64 ecc_address = 0, ecc_syndrom = 0;
6880 	u8 memory_wrapper_idx = 0;
6881 
6882 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
6883 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6884 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6885 
6886 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
6887 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.\n",
6888 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
6889 
6890 	return !!ecc_data->is_critical;
6891 }
6892 
6893 /*
6894  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6895  *
6896  * @idx: the current pi/ci value
6897  * @q_len: the queue length (power of 2)
6898  *
6899  * @return the cyclically decremented index
6900  */
6901 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
6902 {
6903 	u32 mask = q_len - 1;
6904 
6905 	/*
6906 	 * modular decrement is equivalent to adding (queue_size -1)
6907 	 * later we take LSBs to make sure the value is in the
6908 	 * range [0, queue_len - 1]
6909 	 */
6910 	return (idx + q_len - 1) & mask;
6911 }
6912 
6913 /**
6914  * gaudi2_print_sw_config_stream_data - print SW config stream data
6915  *
6916  * @hdev: pointer to the habanalabs device structure
6917  * @stream: the QMAN's stream
6918  * @qman_base: base address of QMAN registers block
6919  */
6920 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
6921 						u32 stream, u64 qman_base)
6922 {
6923 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6924 	u32 cq_ptr_lo_off, size;
6925 
6926 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
6927 
6928 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
6929 									stream * cq_ptr_lo_off;
6930 
6931 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6932 
6933 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
6934 
6935 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6936 	size = RREG32(cq_tsize);
6937 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
6938 		stream, cq_ptr, size);
6939 }
6940 
6941 /**
6942  * gaudi2_print_last_pqes_on_err - print last PQEs on error
6943  *
6944  * @hdev: pointer to the habanalabs device structure
6945  * @qid_base: first QID of the QMAN (out of 4 streams)
6946  * @stream: the QMAN's stream
6947  * @qman_base: base address of QMAN registers block
6948  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6949  */
6950 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
6951 						u64 qman_base, bool pr_sw_conf)
6952 {
6953 	u32 ci, qm_ci_stream_off;
6954 	struct hl_hw_queue *q;
6955 	u64 pq_ci;
6956 	int i;
6957 
6958 	q = &hdev->kernel_queues[qid_base + stream];
6959 
6960 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
6961 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
6962 						stream * qm_ci_stream_off;
6963 
6964 	hdev->asic_funcs->hw_queues_lock(hdev);
6965 
6966 	if (pr_sw_conf)
6967 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
6968 
6969 	ci = RREG32(pq_ci);
6970 
6971 	/* we should start printing form ci -1 */
6972 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6973 
6974 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6975 		struct hl_bd *bd;
6976 		u64 addr;
6977 		u32 len;
6978 
6979 		bd = q->kernel_address;
6980 		bd += ci;
6981 
6982 		len = le32_to_cpu(bd->len);
6983 		/* len 0 means uninitialized entry- break */
6984 		if (!len)
6985 			break;
6986 
6987 		addr = le64_to_cpu(bd->ptr);
6988 
6989 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
6990 			stream, ci, addr, len);
6991 
6992 		/* get previous ci, wrap if needed */
6993 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
6994 	}
6995 
6996 	hdev->asic_funcs->hw_queues_unlock(hdev);
6997 }
6998 
6999 /**
7000  * print_qman_data_on_err - extract QMAN data on error
7001  *
7002  * @hdev: pointer to the habanalabs device structure
7003  * @qid_base: first QID of the QMAN (out of 4 streams)
7004  * @stream: the QMAN's stream
7005  * @qman_base: base address of QMAN registers block
7006  *
7007  * This function attempt to extract as much data as possible on QMAN error.
7008  * On upper CP print the SW config stream data and last 8 PQEs.
7009  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7010  */
7011 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7012 {
7013 	u32 i;
7014 
7015 	if (stream != QMAN_STREAMS) {
7016 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7017 		return;
7018 	}
7019 
7020 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7021 
7022 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7023 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7024 }
7025 
7026 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7027 							u64 qman_base, u32 qid_base)
7028 {
7029 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7030 	u64 glbl_sts_addr, arb_err_addr;
7031 	char reg_desc[32];
7032 
7033 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7034 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7035 
7036 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7037 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7038 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7039 
7040 		if (!glbl_sts_val)
7041 			continue;
7042 
7043 		if (i == QMAN_STREAMS) {
7044 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7045 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7046 		} else {
7047 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7048 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7049 		}
7050 
7051 		for (j = 0 ; j < num_error_causes ; j++)
7052 			if (glbl_sts_val & BIT(j)) {
7053 				gaudi2_print_event(hdev, event_type, true,
7054 					"%s. err cause: %s", reg_desc,
7055 					i == QMAN_STREAMS ?
7056 					gaudi2_qman_lower_cp_error_cause[j] :
7057 					gaudi2_qman_error_cause[j]);
7058 				error_count++;
7059 			}
7060 
7061 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7062 	}
7063 
7064 	arb_err_val = RREG32(arb_err_addr);
7065 
7066 	if (!arb_err_val)
7067 		goto out;
7068 
7069 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7070 		if (arb_err_val & BIT(j)) {
7071 			gaudi2_print_event(hdev, event_type, true,
7072 				"ARB_ERR. err cause: %s",
7073 				gaudi2_qman_arb_error_cause[j]);
7074 			error_count++;
7075 		}
7076 	}
7077 
7078 out:
7079 	return error_count;
7080 }
7081 
7082 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7083 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7084 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7085 			enum gaudi2_engine_id id, u64 *event_mask)
7086 {
7087 	u32 razwi_hi, razwi_lo, razwi_xy;
7088 	u16 eng_id = id;
7089 	u8 rd_wr_flag;
7090 
7091 	if (is_write) {
7092 		if (read_razwi_regs) {
7093 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7094 			razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7095 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7096 		} else {
7097 			razwi_hi = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_hi_reg);
7098 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
7099 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
7100 		}
7101 		rd_wr_flag = HL_RAZWI_WRITE;
7102 	} else {
7103 		if (read_razwi_regs) {
7104 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7105 			razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7106 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7107 		} else {
7108 			razwi_hi = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_hi_reg);
7109 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
7110 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
7111 		}
7112 		rd_wr_flag = HL_RAZWI_READ;
7113 	}
7114 
7115 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7116 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7117 
7118 	dev_err_ratelimited(hdev->dev,
7119 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7120 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7121 }
7122 
7123 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7124 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7125 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
7126 			enum gaudi2_engine_id id, u64 *event_mask)
7127 {
7128 	u32 razwi_addr, razwi_xy;
7129 	u16 eng_id = id;
7130 	u8 rd_wr_flag;
7131 
7132 	if (is_write) {
7133 		if (read_razwi_regs) {
7134 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7135 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7136 		} else {
7137 			razwi_addr = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_reg);
7138 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
7139 		}
7140 
7141 		rd_wr_flag = HL_RAZWI_WRITE;
7142 	} else {
7143 		if (read_razwi_regs) {
7144 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7145 			razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7146 		} else {
7147 			razwi_addr = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_reg);
7148 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
7149 		}
7150 
7151 		rd_wr_flag = HL_RAZWI_READ;
7152 	}
7153 
7154 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7155 	dev_err_ratelimited(hdev->dev,
7156 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
7157 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7158 						razwi_xy);
7159 }
7160 
7161 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7162 						enum razwi_event_sources module, u8 module_idx)
7163 {
7164 	switch (module) {
7165 	case RAZWI_TPC:
7166 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7167 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7168 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7169 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7170 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7171 
7172 	case RAZWI_MME:
7173 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7174 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7175 
7176 	case RAZWI_EDMA:
7177 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7178 			(module_idx % NUM_OF_EDMA_PER_DCORE));
7179 
7180 	case RAZWI_PDMA:
7181 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7182 
7183 	case RAZWI_NIC:
7184 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7185 
7186 	case RAZWI_DEC:
7187 		if (module_idx == 8)
7188 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7189 
7190 		if (module_idx == 9)
7191 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7192 					;
7193 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7194 				(module_idx % NUM_OF_DEC_PER_DCORE) +
7195 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7196 
7197 	case RAZWI_ROT:
7198 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7199 
7200 	default:
7201 		return GAUDI2_ENGINE_ID_SIZE;
7202 	}
7203 }
7204 
7205 /*
7206  * This function handles RR(Range register) hit events.
7207  * raised be initiators not PSOC RAZWI.
7208  */
7209 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7210 				enum razwi_event_sources module, u8 module_idx,
7211 				u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info,
7212 				u64 *event_mask)
7213 {
7214 	bool via_sft = false, read_razwi_regs = false;
7215 	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
7216 	u64 rtr_mstr_if_base_addr;
7217 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7218 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7219 	char initiator_name[64];
7220 
7221 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX) || !razwi_info)
7222 		read_razwi_regs = true;
7223 
7224 	switch (module) {
7225 	case RAZWI_TPC:
7226 		rtr_id = gaudi2_tpc_initiator_rtr_id[module_idx];
7227 		sprintf(initiator_name, "TPC_%u", module_idx);
7228 		break;
7229 	case RAZWI_MME:
7230 		sprintf(initiator_name, "MME_%u", module_idx);
7231 		switch (module_sub_idx) {
7232 		case MME_WAP0:
7233 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7234 			break;
7235 		case MME_WAP1:
7236 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7237 			break;
7238 		case MME_WRITE:
7239 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7240 			break;
7241 		case MME_READ:
7242 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7243 			break;
7244 		case MME_SBTE0:
7245 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7246 			break;
7247 		case MME_SBTE1:
7248 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7249 			break;
7250 		case MME_SBTE2:
7251 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
7252 			break;
7253 		case MME_SBTE3:
7254 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
7255 			break;
7256 		case MME_SBTE4:
7257 			rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
7258 			break;
7259 		default:
7260 			return;
7261 		}
7262 		break;
7263 	case RAZWI_EDMA:
7264 		sft_id = gaudi2_edma_initiator_sft_id[module_idx].interface_id;
7265 		dcore_id = gaudi2_edma_initiator_sft_id[module_idx].dcore_id;
7266 		via_sft = true;
7267 		sprintf(initiator_name, "EDMA_%u", module_idx);
7268 		break;
7269 	case RAZWI_PDMA:
7270 		rtr_id = gaudi2_pdma_initiator_rtr_id[module_idx];
7271 		sprintf(initiator_name, "PDMA_%u", module_idx);
7272 		break;
7273 	case RAZWI_NIC:
7274 		rtr_id = gaudi2_nic_initiator_rtr_id[module_idx];
7275 		sprintf(initiator_name, "NIC_%u", module_idx);
7276 		break;
7277 	case RAZWI_DEC:
7278 		rtr_id = gaudi2_dec_initiator_rtr_id[module_idx];
7279 		sprintf(initiator_name, "DEC_%u", module_idx);
7280 		break;
7281 	case RAZWI_ROT:
7282 		rtr_id = gaudi2_rot_initiator_rtr_id[module_idx];
7283 		sprintf(initiator_name, "ROT_%u", module_idx);
7284 		break;
7285 	default:
7286 		return;
7287 	}
7288 
7289 	if (!read_razwi_regs) {
7290 		if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_HBW) {
7291 			hbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7292 								RAZWI_HAPPENED_AW;
7293 			hbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7294 								RAZWI_HAPPENED_AR;
7295 		} else if (le32_to_cpu(razwi_info->razwi_happened_mask) & RAZWI_HAPPENED_LBW) {
7296 			lbw_shrd_aw = le32_to_cpu(razwi_info->razwi_happened_mask) &
7297 								RAZWI_HAPPENED_AW;
7298 			lbw_shrd_ar = le32_to_cpu(razwi_info->razwi_happened_mask) &
7299 								RAZWI_HAPPENED_AR;
7300 		}
7301 		rtr_mstr_if_base_addr = 0;
7302 
7303 		goto dump_info;
7304 	}
7305 
7306 	/* Find router mstr_if register base */
7307 	if (via_sft) {
7308 		rtr_mstr_if_base_addr = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE +
7309 				dcore_id * SFT_DCORE_OFFSET +
7310 				sft_id * SFT_IF_OFFSET +
7311 				RTR_MSTR_IF_OFFSET;
7312 	} else {
7313 		dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7314 		dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7315 		rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
7316 				dcore_id * DCORE_OFFSET +
7317 				dcore_rtr_id * DCORE_RTR_OFFSET +
7318 				RTR_MSTR_IF_OFFSET;
7319 	}
7320 
7321 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
7322 	hbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
7323 
7324 	hbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
7325 
7326 	if (via_sft) {
7327 		/* SFT has separate MSTR_IF for LBW, only there we can
7328 		 * read the LBW razwi related registers
7329 		 */
7330 		u64 base;
7331 
7332 		base = mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE + dcore_id * SFT_DCORE_OFFSET +
7333 				RTR_LBW_MSTR_IF_OFFSET;
7334 
7335 		lbw_shrd_aw = RREG32(base + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7336 
7337 		lbw_shrd_ar = RREG32(base + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7338 	} else {
7339 		lbw_shrd_aw = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
7340 
7341 		lbw_shrd_ar = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
7342 	}
7343 
7344 dump_info:
7345 	/* check if there is no RR razwi indication at all */
7346 	if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
7347 		return;
7348 
7349 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
7350 	if (hbw_shrd_aw) {
7351 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7352 						initiator_name, read_razwi_regs, razwi_info,
7353 						eng_id, event_mask);
7354 
7355 		/* Clear event indication */
7356 		if (read_razwi_regs)
7357 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
7358 	}
7359 
7360 	if (hbw_shrd_ar) {
7361 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7362 						initiator_name, read_razwi_regs, razwi_info,
7363 						eng_id, event_mask);
7364 
7365 		/* Clear event indication */
7366 		if (read_razwi_regs)
7367 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
7368 	}
7369 
7370 	if (lbw_shrd_aw) {
7371 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
7372 						initiator_name, read_razwi_regs, razwi_info,
7373 						eng_id, event_mask);
7374 
7375 		/* Clear event indication */
7376 		if (read_razwi_regs)
7377 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
7378 	}
7379 
7380 	if (lbw_shrd_ar) {
7381 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
7382 						initiator_name, read_razwi_regs, razwi_info,
7383 						eng_id, event_mask);
7384 
7385 		/* Clear event indication */
7386 		if (read_razwi_regs)
7387 			WREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
7388 	}
7389 }
7390 
7391 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
7392 {
7393 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7394 	u8 mod_idx, sub_mod;
7395 
7396 	/* check all TPCs */
7397 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
7398 		if (prop->tpc_enabled_mask & BIT(mod_idx))
7399 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL,
7400 								NULL);
7401 	}
7402 
7403 	/* check all MMEs */
7404 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7405 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
7406 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
7407 									sub_mod, NULL, NULL);
7408 
7409 	/* check all EDMAs */
7410 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
7411 		if (prop->edma_enabled_mask & BIT(mod_idx))
7412 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL,
7413 								NULL);
7414 
7415 	/* check all PDMAs */
7416 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
7417 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL,
7418 							NULL);
7419 
7420 	/* check all NICs */
7421 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
7422 		if (hdev->nic_ports_mask & BIT(mod_idx))
7423 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
7424 								NULL, NULL);
7425 
7426 	/* check all DECs */
7427 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
7428 		if (prop->decoder_enabled_mask & BIT(mod_idx))
7429 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL,
7430 								NULL);
7431 
7432 	/* check all ROTs */
7433 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
7434 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL);
7435 }
7436 
7437 static const char *gaudi2_get_initiators_name(u32 rtr_id)
7438 {
7439 	switch (rtr_id) {
7440 	case DCORE0_RTR0:
7441 		return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU";
7442 	case DCORE0_RTR1:
7443 		return "TPC0/1";
7444 	case DCORE0_RTR2:
7445 		return "TPC2/3";
7446 	case DCORE0_RTR3:
7447 		return "TPC4/5";
7448 	case DCORE0_RTR4:
7449 		return "MME0_SBTE0/1";
7450 	case DCORE0_RTR5:
7451 		return "MME0_WAP0/SBTE2";
7452 	case DCORE0_RTR6:
7453 		return "MME0_CTRL_WR/SBTE3";
7454 	case DCORE0_RTR7:
7455 		return "MME0_WAP1/CTRL_RD/SBTE4";
7456 	case DCORE1_RTR0:
7457 		return "MME1_WAP1/CTRL_RD/SBTE4";
7458 	case DCORE1_RTR1:
7459 		return "MME1_CTRL_WR/SBTE3";
7460 	case DCORE1_RTR2:
7461 		return "MME1_WAP0/SBTE2";
7462 	case DCORE1_RTR3:
7463 		return "MME1_SBTE0/1";
7464 	case DCORE1_RTR4:
7465 		return "TPC10/11";
7466 	case DCORE1_RTR5:
7467 		return "TPC8/9";
7468 	case DCORE1_RTR6:
7469 		return "TPC6/7";
7470 	case DCORE1_RTR7:
7471 		return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7";
7472 	case DCORE2_RTR0:
7473 		return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0";
7474 	case DCORE2_RTR1:
7475 		return "TPC16/17";
7476 	case DCORE2_RTR2:
7477 		return "TPC14/15";
7478 	case DCORE2_RTR3:
7479 		return "TPC12/13";
7480 	case DCORE2_RTR4:
7481 		return "MME2_SBTE0/1";
7482 	case DCORE2_RTR5:
7483 		return "MME2_WAP0/SBTE2";
7484 	case DCORE2_RTR6:
7485 		return "MME2_CTRL_WR/SBTE3";
7486 	case DCORE2_RTR7:
7487 		return "MME2_WAP1/CTRL_RD/SBTE4";
7488 	case DCORE3_RTR0:
7489 		return "MME3_WAP1/CTRL_RD/SBTE4";
7490 	case DCORE3_RTR1:
7491 		return "MME3_CTRL_WR/SBTE3";
7492 	case DCORE3_RTR2:
7493 		return "MME3_WAP0/SBTE2";
7494 	case DCORE3_RTR3:
7495 		return "MME3_SBTE0/1";
7496 	case DCORE3_RTR4:
7497 		return "TPC18/19";
7498 	case DCORE3_RTR5:
7499 		return "TPC20/21";
7500 	case DCORE3_RTR6:
7501 		return "TPC22/23";
7502 	case DCORE3_RTR7:
7503 		return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC";
7504 	default:
7505 	return "N/A";
7506 	}
7507 }
7508 
7509 static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
7510 {
7511 	switch (rtr_id) {
7512 	case DCORE0_RTR0:
7513 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
7514 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
7515 		engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
7516 		engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
7517 		engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7518 		engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
7519 		engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
7520 		engines[7] = GAUDI2_ENGINE_ID_PCIE;
7521 		engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
7522 		engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
7523 		engines[10] = GAUDI2_ENGINE_ID_PSOC;
7524 		return 11;
7525 
7526 	case DCORE0_RTR1:
7527 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
7528 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
7529 		return 2;
7530 
7531 	case DCORE0_RTR2:
7532 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
7533 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
7534 		return 2;
7535 
7536 	case DCORE0_RTR3:
7537 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
7538 		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
7539 		return 2;
7540 
7541 	case DCORE0_RTR4:
7542 	case DCORE0_RTR5:
7543 	case DCORE0_RTR6:
7544 	case DCORE0_RTR7:
7545 		engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
7546 		return 1;
7547 
7548 	case DCORE1_RTR0:
7549 	case DCORE1_RTR1:
7550 	case DCORE1_RTR2:
7551 	case DCORE1_RTR3:
7552 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
7553 		return 1;
7554 
7555 	case DCORE1_RTR4:
7556 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
7557 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
7558 		return 2;
7559 
7560 	case DCORE1_RTR5:
7561 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
7562 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
7563 		return 2;
7564 
7565 	case DCORE1_RTR6:
7566 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
7567 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
7568 		return 2;
7569 
7570 	case DCORE1_RTR7:
7571 		engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
7572 		engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
7573 		engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
7574 		engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
7575 		engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
7576 		engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
7577 		engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
7578 		engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
7579 		engines[8] = GAUDI2_ENGINE_ID_KDMA;
7580 		engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
7581 		engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
7582 		return 11;
7583 
7584 	case DCORE2_RTR0:
7585 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
7586 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
7587 		engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
7588 		engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
7589 		engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
7590 		engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
7591 		engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
7592 		engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
7593 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7594 		return 9;
7595 
7596 	case DCORE2_RTR1:
7597 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
7598 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
7599 		return 2;
7600 
7601 	case DCORE2_RTR2:
7602 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
7603 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
7604 		return 2;
7605 
7606 	case DCORE2_RTR3:
7607 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
7608 		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
7609 		return 2;
7610 
7611 	case DCORE2_RTR4:
7612 	case DCORE2_RTR5:
7613 	case DCORE2_RTR6:
7614 	case DCORE2_RTR7:
7615 		engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
7616 		return 1;
7617 	case DCORE3_RTR0:
7618 	case DCORE3_RTR1:
7619 	case DCORE3_RTR2:
7620 	case DCORE3_RTR3:
7621 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
7622 		return 1;
7623 	case DCORE3_RTR4:
7624 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
7625 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
7626 		return 2;
7627 	case DCORE3_RTR5:
7628 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
7629 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
7630 		return 2;
7631 	case DCORE3_RTR6:
7632 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
7633 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
7634 		return 2;
7635 	case DCORE3_RTR7:
7636 		engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
7637 		engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
7638 		engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
7639 		engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
7640 		engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
7641 		engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
7642 		engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
7643 		engines[7] = GAUDI2_ENGINE_ID_ROT_1;
7644 		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
7645 		return 9;
7646 	default:
7647 		return 0;
7648 	}
7649 }
7650 
7651 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7652 							u64 rtr_ctrl_base_addr, bool is_write,
7653 							u64 *event_mask)
7654 {
7655 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7656 	u32 razwi_hi, razwi_lo;
7657 	u8 rd_wr_flag;
7658 
7659 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7660 
7661 	if (is_write) {
7662 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
7663 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
7664 		rd_wr_flag = HL_RAZWI_WRITE;
7665 
7666 		/* Clear set indication */
7667 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
7668 	} else {
7669 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
7670 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
7671 		rd_wr_flag = HL_RAZWI_READ;
7672 
7673 		/* Clear set indication */
7674 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
7675 	}
7676 
7677 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
7678 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7679 	dev_err_ratelimited(hdev->dev,
7680 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
7681 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
7682 
7683 	dev_err_ratelimited(hdev->dev,
7684 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7685 }
7686 
7687 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
7688 							u64 rtr_ctrl_base_addr, bool is_write,
7689 							u64 *event_mask)
7690 {
7691 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
7692 	u32 razwi_addr;
7693 	u8 rd_wr_flag;
7694 
7695 	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
7696 
7697 	if (is_write) {
7698 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
7699 		rd_wr_flag = HL_RAZWI_WRITE;
7700 
7701 		/* Clear set indication */
7702 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
7703 	} else {
7704 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
7705 		rd_wr_flag = HL_RAZWI_READ;
7706 
7707 		/* Clear set indication */
7708 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
7709 	}
7710 
7711 	hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
7712 			event_mask);
7713 	dev_err_ratelimited(hdev->dev,
7714 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
7715 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
7716 
7717 	dev_err_ratelimited(hdev->dev,
7718 		"Initiators: %s\n", gaudi2_get_initiators_name(rtr_id));
7719 }
7720 
7721 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
7722 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
7723 {
7724 	u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
7725 						razwi_mask_info, razwi_intr = 0, error_count = 0;
7726 	int rtr_map_arr_len = NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES;
7727 	u64 rtr_ctrl_base_addr;
7728 
7729 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
7730 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
7731 		if (!razwi_intr)
7732 			return 0;
7733 	}
7734 
7735 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
7736 	xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info);
7737 
7738 	dev_err_ratelimited(hdev->dev,
7739 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
7740 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
7741 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
7742 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
7743 		xy,
7744 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
7745 
7746 	if (xy == 0) {
7747 		dev_err_ratelimited(hdev->dev,
7748 				"PSOC RAZWI interrupt: received event from 0 rtr coordinates\n");
7749 		goto clear;
7750 	}
7751 
7752 	/* Find router id by router coordinates */
7753 	for (rtr_id = 0 ; rtr_id < rtr_map_arr_len ; rtr_id++)
7754 		if (rtr_coordinates_to_rtr_id[rtr_id] == xy)
7755 			break;
7756 
7757 	if (rtr_id == rtr_map_arr_len) {
7758 		dev_err_ratelimited(hdev->dev,
7759 				"PSOC RAZWI interrupt: invalid rtr coordinates (0x%x)\n", xy);
7760 		goto clear;
7761 	}
7762 
7763 	/* Find router mstr_if register base */
7764 	dcore_id = rtr_id / NUM_OF_RTR_PER_DCORE;
7765 	dcore_rtr_id = rtr_id % NUM_OF_RTR_PER_DCORE;
7766 	rtr_ctrl_base_addr = mmDCORE0_RTR0_CTRL_BASE + dcore_id * DCORE_OFFSET +
7767 				dcore_rtr_id * DCORE_RTR_OFFSET;
7768 
7769 	hbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET);
7770 	hbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET);
7771 	lbw_aw_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET);
7772 	lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET);
7773 
7774 	if (hbw_aw_set)
7775 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7776 						rtr_ctrl_base_addr, true, event_mask);
7777 
7778 	if (hbw_ar_set)
7779 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
7780 						rtr_ctrl_base_addr, false, event_mask);
7781 
7782 	if (lbw_aw_set)
7783 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7784 						rtr_ctrl_base_addr, true, event_mask);
7785 
7786 	if (lbw_ar_set)
7787 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
7788 						rtr_ctrl_base_addr, false, event_mask);
7789 
7790 	error_count++;
7791 
7792 clear:
7793 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
7794 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
7795 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
7796 
7797 	return error_count;
7798 }
7799 
7800 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
7801 {
7802 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7803 
7804 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
7805 
7806 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
7807 		if (sts_val & BIT(i)) {
7808 			gaudi2_print_event(hdev, event_type, true,
7809 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
7810 			sts_clr_val |= BIT(i);
7811 			error_count++;
7812 		}
7813 	}
7814 
7815 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
7816 
7817 	return error_count;
7818 }
7819 
7820 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
7821 					struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
7822 {
7823 	enum razwi_event_sources module;
7824 	u32 error_count = 0;
7825 	u64 qman_base;
7826 	u8 index;
7827 
7828 	switch (event_type) {
7829 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
7830 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
7831 		qman_base = mmDCORE0_TPC0_QM_BASE +
7832 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
7833 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
7834 		module = RAZWI_TPC;
7835 		break;
7836 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
7837 		qman_base = mmDCORE0_TPC6_QM_BASE;
7838 		module = RAZWI_TPC;
7839 		break;
7840 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
7841 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
7842 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
7843 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
7844 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
7845 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
7846 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
7847 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
7848 		module = RAZWI_MME;
7849 		break;
7850 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
7851 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
7852 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
7853 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
7854 		module = RAZWI_PDMA;
7855 		break;
7856 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
7857 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
7858 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
7859 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
7860 		module = RAZWI_ROT;
7861 		break;
7862 	default:
7863 		return 0;
7864 	}
7865 
7866 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7867 
7868 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
7869 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
7870 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
7871 		error_count += _gaudi2_handle_qm_sei_err(hdev,
7872 					qman_base + NIC_QM_OFFSET, event_type);
7873 
7874 	/* check if RAZWI happened */
7875 	if (razwi_info)
7876 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask);
7877 
7878 	return error_count;
7879 }
7880 
7881 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
7882 {
7883 	u32 qid_base, error_count = 0;
7884 	u64 qman_base;
7885 	u8 index;
7886 
7887 	switch (event_type) {
7888 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
7889 		index = event_type - GAUDI2_EVENT_TPC0_QM;
7890 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
7891 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7892 		break;
7893 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
7894 		index = event_type - GAUDI2_EVENT_TPC6_QM;
7895 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
7896 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7897 		break;
7898 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
7899 		index = event_type - GAUDI2_EVENT_TPC12_QM;
7900 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
7901 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7902 		break;
7903 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
7904 		index = event_type - GAUDI2_EVENT_TPC18_QM;
7905 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
7906 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
7907 		break;
7908 	case GAUDI2_EVENT_TPC24_QM:
7909 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
7910 		qman_base = mmDCORE0_TPC6_QM_BASE;
7911 		break;
7912 	case GAUDI2_EVENT_MME0_QM:
7913 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
7914 		qman_base = mmDCORE0_MME_QM_BASE;
7915 		break;
7916 	case GAUDI2_EVENT_MME1_QM:
7917 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
7918 		qman_base = mmDCORE1_MME_QM_BASE;
7919 		break;
7920 	case GAUDI2_EVENT_MME2_QM:
7921 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
7922 		qman_base = mmDCORE2_MME_QM_BASE;
7923 		break;
7924 	case GAUDI2_EVENT_MME3_QM:
7925 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
7926 		qman_base = mmDCORE3_MME_QM_BASE;
7927 		break;
7928 	case GAUDI2_EVENT_HDMA0_QM:
7929 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
7930 		qman_base = mmDCORE0_EDMA0_QM_BASE;
7931 		break;
7932 	case GAUDI2_EVENT_HDMA1_QM:
7933 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
7934 		qman_base = mmDCORE0_EDMA1_QM_BASE;
7935 		break;
7936 	case GAUDI2_EVENT_HDMA2_QM:
7937 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
7938 		qman_base = mmDCORE1_EDMA0_QM_BASE;
7939 		break;
7940 	case GAUDI2_EVENT_HDMA3_QM:
7941 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
7942 		qman_base = mmDCORE1_EDMA1_QM_BASE;
7943 		break;
7944 	case GAUDI2_EVENT_HDMA4_QM:
7945 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
7946 		qman_base = mmDCORE2_EDMA0_QM_BASE;
7947 		break;
7948 	case GAUDI2_EVENT_HDMA5_QM:
7949 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
7950 		qman_base = mmDCORE2_EDMA1_QM_BASE;
7951 		break;
7952 	case GAUDI2_EVENT_HDMA6_QM:
7953 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
7954 		qman_base = mmDCORE3_EDMA0_QM_BASE;
7955 		break;
7956 	case GAUDI2_EVENT_HDMA7_QM:
7957 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
7958 		qman_base = mmDCORE3_EDMA1_QM_BASE;
7959 		break;
7960 	case GAUDI2_EVENT_PDMA0_QM:
7961 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
7962 		qman_base = mmPDMA0_QM_BASE;
7963 		break;
7964 	case GAUDI2_EVENT_PDMA1_QM:
7965 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
7966 		qman_base = mmPDMA1_QM_BASE;
7967 		break;
7968 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
7969 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
7970 		qman_base = mmROT0_QM_BASE;
7971 		break;
7972 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
7973 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
7974 		qman_base = mmROT1_QM_BASE;
7975 		break;
7976 	default:
7977 		return 0;
7978 	}
7979 
7980 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
7981 
7982 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
7983 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM)
7984 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
7985 
7986 	return error_count;
7987 }
7988 
7989 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
7990 {
7991 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
7992 
7993 	sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
7994 
7995 	for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
7996 		if (sts_val & BIT(i)) {
7997 			gaudi2_print_event(hdev, event_type, true,
7998 				"err cause: %s", gaudi2_arc_sei_error_cause[i]);
7999 			sts_clr_val |= BIT(i);
8000 			error_count++;
8001 		}
8002 	}
8003 
8004 	WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
8005 
8006 	return error_count;
8007 }
8008 
8009 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8010 {
8011 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8012 
8013 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8014 
8015 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8016 		if (sts_val & BIT(i)) {
8017 			gaudi2_print_event(hdev, event_type, true,
8018 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8019 			sts_clr_val |= BIT(i);
8020 			error_count++;
8021 		}
8022 	}
8023 
8024 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8025 
8026 	return error_count;
8027 }
8028 
8029 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8030 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8031 					u64 *event_mask)
8032 {
8033 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8034 	u32 error_count = 0;
8035 	int i;
8036 
8037 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8038 		if (intr_cause_data & BIT(i)) {
8039 			gaudi2_print_event(hdev, event_type, true,
8040 				"err cause: %s", guadi2_rot_error_cause[i]);
8041 			error_count++;
8042 		}
8043 
8044 	/* check if RAZWI happened */
8045 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
8046 						&razwi_with_intr_cause->razwi_info, event_mask);
8047 
8048 	return error_count;
8049 }
8050 
8051 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8052 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8053 					u64 *event_mask)
8054 {
8055 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8056 	u32 error_count = 0;
8057 	int i;
8058 
8059 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8060 		if (intr_cause_data & BIT(i)) {
8061 			gaudi2_print_event(hdev, event_type, true,
8062 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8063 			error_count++;
8064 		}
8065 
8066 	/* check if RAZWI happened */
8067 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
8068 						&razwi_with_intr_cause->razwi_info, event_mask);
8069 
8070 	return error_count;
8071 }
8072 
8073 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8074 				struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8075 {
8076 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8077 	int i;
8078 
8079 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8080 		/* DCORE DEC */
8081 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8082 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8083 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8084 	else
8085 		/* PCIE DEC */
8086 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8087 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8088 
8089 	sts_val = RREG32(sts_addr);
8090 
8091 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8092 		if (sts_val & BIT(i)) {
8093 			gaudi2_print_event(hdev, event_type, true,
8094 				"err cause: %s", gaudi2_dec_error_cause[i]);
8095 			sts_clr_val |= BIT(i);
8096 			error_count++;
8097 		}
8098 	}
8099 
8100 	/* check if RAZWI happened */
8101 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info,
8102 						event_mask);
8103 
8104 	/* Write 1 clear errors */
8105 	WREG32(sts_addr, sts_clr_val);
8106 
8107 	return error_count;
8108 }
8109 
8110 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8111 				struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8112 {
8113 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8114 	int i;
8115 
8116 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8117 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8118 
8119 	sts_val = RREG32(sts_addr);
8120 
8121 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8122 		if (sts_val & BIT(i)) {
8123 			gaudi2_print_event(hdev, event_type, true,
8124 				"err cause: %s", guadi2_mme_error_cause[i]);
8125 			sts_clr_val |= BIT(i);
8126 			error_count++;
8127 		}
8128 	}
8129 
8130 	/* check if RAZWI happened */
8131 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8132 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info,
8133 							event_mask);
8134 
8135 	WREG32(sts_clr_addr, sts_clr_val);
8136 
8137 	return error_count;
8138 }
8139 
8140 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8141 					u64 intr_cause_data)
8142 {
8143 	int i, error_count = 0;
8144 
8145 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8146 		if (intr_cause_data & BIT(i)) {
8147 			gaudi2_print_event(hdev, event_type, true,
8148 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8149 			error_count++;
8150 		}
8151 
8152 	return error_count;
8153 }
8154 
8155 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8156 					struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
8157 {
8158 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8159 	int i;
8160 
8161 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8162 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8163 
8164 	sts_val = RREG32(sts_addr);
8165 
8166 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8167 		if (sts_val & BIT(i)) {
8168 			gaudi2_print_event(hdev, event_type, true,
8169 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8170 			sts_clr_val |= BIT(i);
8171 			error_count++;
8172 		}
8173 	}
8174 
8175 	/* check if RAZWI happened on WAP0/1 */
8176 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info,
8177 						event_mask);
8178 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info,
8179 						event_mask);
8180 
8181 	WREG32(sts_clr_addr, sts_clr_val);
8182 
8183 	return error_count;
8184 }
8185 
8186 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8187 					u64 intr_cause_data)
8188 {
8189 	u32 error_count = 0;
8190 	int i;
8191 
8192 	/* If an AXI read or write error is received, an error is reported and
8193 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8194 	 * register of the KDMA engine, the reported error is always HBW even if
8195 	 * the actual error caused by a LBW KDMA transaction.
8196 	 */
8197 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8198 		if (intr_cause_data & BIT(i)) {
8199 			gaudi2_print_event(hdev, event_type, true,
8200 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8201 			error_count++;
8202 		}
8203 
8204 	return error_count;
8205 }
8206 
8207 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type,
8208 					u64 intr_cause_data)
8209 {
8210 	u32 error_count = 0;
8211 	int i;
8212 
8213 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8214 		if (intr_cause_data & BIT(i)) {
8215 			gaudi2_print_event(hdev, event_type, true,
8216 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8217 			error_count++;
8218 		}
8219 
8220 	return error_count;
8221 }
8222 
8223 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8224 {
8225 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8226 
8227 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8228 	if (RREG32(razwi_happened_addr)) {
8229 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8230 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8231 		WREG32(razwi_happened_addr, 0x1);
8232 	}
8233 
8234 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8235 	if (RREG32(razwi_happened_addr)) {
8236 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8237 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8238 		WREG32(razwi_happened_addr, 0x1);
8239 	}
8240 
8241 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8242 	if (RREG32(razwi_happened_addr)) {
8243 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
8244 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8245 		WREG32(razwi_happened_addr, 0x1);
8246 	}
8247 
8248 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8249 	if (RREG32(razwi_happened_addr)) {
8250 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
8251 							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
8252 		WREG32(razwi_happened_addr, 0x1);
8253 	}
8254 }
8255 
8256 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8257 					u64 intr_cause_data, u64 *event_mask)
8258 {
8259 	u32 error_count = 0;
8260 	int i;
8261 
8262 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8263 		if (!(intr_cause_data & BIT_ULL(i)))
8264 			continue;
8265 
8266 		gaudi2_print_event(hdev, event_type, true,
8267 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8268 		error_count++;
8269 
8270 		switch (intr_cause_data & BIT_ULL(i)) {
8271 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8272 			break;
8273 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8274 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8275 			break;
8276 		}
8277 	}
8278 
8279 	return error_count;
8280 }
8281 
8282 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8283 				u64 intr_cause_data)
8284 
8285 {
8286 	u32 error_count = 0;
8287 	int i;
8288 
8289 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8290 		if (intr_cause_data & BIT_ULL(i)) {
8291 			gaudi2_print_event(hdev, event_type, true,
8292 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8293 			error_count++;
8294 		}
8295 	}
8296 
8297 	return error_count;
8298 }
8299 
8300 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8301 {
8302 	u32 error_count = 0;
8303 	int i;
8304 
8305 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8306 		if (intr_cause_data & BIT_ULL(i)) {
8307 			gaudi2_print_event(hdev, event_type, true,
8308 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8309 			error_count++;
8310 		}
8311 	}
8312 
8313 	return error_count;
8314 }
8315 
8316 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8317 					u64 *event_mask)
8318 {
8319 	u32 valid, val;
8320 	u64 addr;
8321 
8322 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8323 
8324 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8325 		return;
8326 
8327 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8328 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8329 	addr <<= 32;
8330 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8331 
8332 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8333 				is_pmmu ? "PMMU" : "HMMU", addr);
8334 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8335 
8336 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
8337 }
8338 
8339 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8340 {
8341 	u32 valid, val;
8342 	u64 addr;
8343 
8344 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8345 
8346 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8347 		return;
8348 
8349 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8350 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8351 	addr <<= 32;
8352 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8353 
8354 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8355 				is_pmmu ? "PMMU" : "HMMU", addr);
8356 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8357 }
8358 
8359 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8360 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8361 {
8362 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8363 	int i;
8364 
8365 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8366 
8367 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8368 		if (spi_sei_cause & BIT(i)) {
8369 			gaudi2_print_event(hdev, event_type, true,
8370 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8371 
8372 			if (i == 0)
8373 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8374 			else if (i == 1)
8375 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8376 
8377 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8378 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8379 
8380 			error_count++;
8381 		}
8382 	}
8383 
8384 	/* Clear cause */
8385 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8386 
8387 	/* Clear interrupt */
8388 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8389 
8390 	return error_count;
8391 }
8392 
8393 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8394 {
8395 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8396 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8397 	int i;
8398 
8399 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8400 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8401 
8402 	sei_cause_val = RREG32(sei_cause_addr);
8403 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8404 	cq_intr_val = RREG32(cq_intr_addr);
8405 
8406 	/* SEI interrupt */
8407 	if (sei_cause_cause) {
8408 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8409 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8410 					sei_cause_val);
8411 
8412 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8413 			if (!(sei_cause_cause & BIT(i)))
8414 				continue;
8415 
8416 			gaudi2_print_event(hdev, event_type, true,
8417 				"err cause: %s. %s: 0x%X\n",
8418 				gaudi2_sm_sei_cause[i].cause_name,
8419 				gaudi2_sm_sei_cause[i].log_name,
8420 				sei_cause_log);
8421 			error_count++;
8422 			break;
8423 		}
8424 
8425 		/* Clear SM_SEI_CAUSE */
8426 		WREG32(sei_cause_addr, 0);
8427 	}
8428 
8429 	/* CQ interrupt */
8430 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8431 		cq_intr_queue_index =
8432 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8433 					cq_intr_val);
8434 
8435 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8436 				sm_index, cq_intr_queue_index);
8437 		error_count++;
8438 
8439 		/* Clear CQ_INTR */
8440 		WREG32(cq_intr_addr, 0);
8441 	}
8442 
8443 	return error_count;
8444 }
8445 
8446 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8447 {
8448 	bool is_pmmu = false;
8449 	u32 error_count = 0;
8450 	u64 mmu_base;
8451 	u8 index;
8452 
8453 	switch (event_type) {
8454 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8455 		index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8456 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8457 		break;
8458 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8459 		index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8460 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8461 		break;
8462 	case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8463 		index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8464 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8465 		break;
8466 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
8467 		index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
8468 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8469 		break;
8470 	case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
8471 		index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
8472 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8473 		break;
8474 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
8475 		index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
8476 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8477 		break;
8478 	case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
8479 		index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
8480 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8481 		break;
8482 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
8483 		index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
8484 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8485 		break;
8486 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
8487 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
8488 		is_pmmu = true;
8489 		mmu_base = mmPMMU_HBW_MMU_BASE;
8490 		break;
8491 	default:
8492 		return 0;
8493 	}
8494 
8495 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
8496 							is_pmmu, event_mask);
8497 
8498 	return error_count;
8499 }
8500 
8501 
8502 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
8503 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
8504 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
8505 {
8506 	u32 addr, beat, beat_shift;
8507 	bool rc = false;
8508 
8509 	dev_err_ratelimited(hdev->dev,
8510 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
8511 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
8512 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
8513 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
8514 
8515 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
8516 	dev_err_ratelimited(hdev->dev,
8517 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
8518 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
8519 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
8520 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
8521 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
8522 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
8523 
8524 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
8525 	for (beat = 0 ; beat < 4 ; beat++) {
8526 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8527 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
8528 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
8529 						beat,
8530 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8531 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8532 
8533 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8534 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
8535 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
8536 						beat,
8537 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8538 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
8539 			rc |= true;
8540 		}
8541 
8542 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
8543 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8544 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
8545 			dev_err_ratelimited(hdev->dev,
8546 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
8547 					beat,
8548 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
8549 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
8550 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
8551 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
8552 			rc |= true;
8553 		}
8554 
8555 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
8556 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8557 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
8558 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
8559 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
8560 	}
8561 
8562 	return rc;
8563 }
8564 
8565 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
8566 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
8567 {
8568 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
8569 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
8570 
8571 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
8572 
8573 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
8574 				derr & 0x3, derr & 0xc);
8575 
8576 	/* JIRA H6-3286 - the following prints may not be valid */
8577 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
8578 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
8579 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
8580 		dev_err_ratelimited(hdev->dev,
8581 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
8582 				i,
8583 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
8584 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
8585 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
8586 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
8587 	}
8588 }
8589 
8590 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
8591 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
8592 {
8593 	__le32 *col_cmd = ca_par_err_data->dbg_col;
8594 	__le16 *row_cmd = ca_par_err_data->dbg_row;
8595 	u32 i;
8596 
8597 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
8598 
8599 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
8600 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
8601 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
8602 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
8603 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
8604 }
8605 
8606 /* Returns true if hard reset is needed or false otherwise */
8607 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
8608 					struct hl_eq_hbm_sei_data *sei_data)
8609 {
8610 	bool require_hard_reset = false;
8611 	u32 hbm_id, mc_id, cause_idx;
8612 
8613 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
8614 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
8615 
8616 	cause_idx = sei_data->hdr.sei_cause;
8617 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
8618 		gaudi2_print_event(hdev, event_type, true,
8619 			"err cause: %s",
8620 			"Invalid HBM SEI event cause (%d) provided by FW\n", cause_idx);
8621 		return true;
8622 	}
8623 
8624 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
8625 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n",
8626 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
8627 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
8628 		hbm_mc_sei_cause[cause_idx]);
8629 
8630 	/* Print error-specific info */
8631 	switch (cause_idx) {
8632 	case HBM_SEI_CATTRIP:
8633 		require_hard_reset = true;
8634 		break;
8635 
8636 	case  HBM_SEI_CMD_PARITY_EVEN:
8637 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
8638 						le32_to_cpu(sei_data->hdr.cnt));
8639 		require_hard_reset = true;
8640 		break;
8641 
8642 	case  HBM_SEI_CMD_PARITY_ODD:
8643 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
8644 						le32_to_cpu(sei_data->hdr.cnt));
8645 		require_hard_reset = true;
8646 		break;
8647 
8648 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
8649 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
8650 						le32_to_cpu(sei_data->hdr.cnt));
8651 		require_hard_reset = true;
8652 		break;
8653 
8654 	case HBM_SEI_READ_ERR:
8655 		/* Unlike other SEI events, read error requires further processing of the
8656 		 * raw data in order to determine the root cause.
8657 		 */
8658 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
8659 								&sei_data->read_err_info,
8660 								le32_to_cpu(sei_data->hdr.cnt));
8661 		break;
8662 
8663 	default:
8664 		break;
8665 	}
8666 
8667 	require_hard_reset |= !!sei_data->hdr.is_critical;
8668 
8669 	return require_hard_reset;
8670 }
8671 
8672 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
8673 				u64 intr_cause_data)
8674 {
8675 	if (intr_cause_data) {
8676 		gaudi2_print_event(hdev, event_type, true,
8677 			"temperature error cause: %#llx", intr_cause_data);
8678 		return 1;
8679 	}
8680 
8681 	return 0;
8682 }
8683 
8684 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
8685 {
8686 	u32 i, error_count = 0;
8687 
8688 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
8689 		if (intr_cause_data & hbm_mc_spi[i].mask) {
8690 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
8691 				hbm_mc_spi[i].cause);
8692 			error_count++;
8693 		}
8694 
8695 	return error_count;
8696 }
8697 
8698 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8699 {
8700 	ktime_t zero_time = ktime_set(0, 0);
8701 
8702 	mutex_lock(&hdev->clk_throttling.lock);
8703 
8704 	switch (event_type) {
8705 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
8706 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8707 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8708 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8709 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8710 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
8711 		break;
8712 
8713 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
8714 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8715 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8716 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
8717 		break;
8718 
8719 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
8720 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8721 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8722 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8723 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8724 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8725 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
8726 		break;
8727 
8728 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
8729 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8730 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8731 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8732 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
8733 		break;
8734 
8735 	default:
8736 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
8737 		break;
8738 	}
8739 
8740 	mutex_unlock(&hdev->clk_throttling.lock);
8741 }
8742 
8743 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
8744 					struct cpucp_pkt_sync_err *sync_err)
8745 {
8746 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8747 
8748 	gaudi2_print_event(hdev, event_type, false,
8749 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8750 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
8751 		q->pi, atomic_read(&q->ci));
8752 }
8753 
8754 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
8755 {
8756 	u32 p2p_intr, msix_gw_intr, error_count = 0;
8757 
8758 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
8759 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
8760 
8761 	if (p2p_intr) {
8762 		gaudi2_print_event(hdev, event_type, true,
8763 			"pcie p2p transaction terminated due to security, req_id(0x%x)\n",
8764 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
8765 
8766 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
8767 		error_count++;
8768 	}
8769 
8770 	if (msix_gw_intr) {
8771 		gaudi2_print_event(hdev, event_type, true,
8772 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)\n",
8773 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
8774 
8775 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
8776 		error_count++;
8777 	}
8778 
8779 	return error_count;
8780 }
8781 
8782 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
8783 			struct hl_eq_pcie_drain_ind_data *drain_data)
8784 {
8785 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
8786 
8787 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
8788 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
8789 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
8790 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
8791 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
8792 
8793 	if (cause & BIT_ULL(0)) {
8794 		dev_err_ratelimited(hdev->dev,
8795 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
8796 			!!lbw_rd, !!lbw_wr);
8797 		error_count++;
8798 	}
8799 
8800 	if (cause & BIT_ULL(1)) {
8801 		dev_err_ratelimited(hdev->dev,
8802 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
8803 			hbw_rd, hbw_wr);
8804 		error_count++;
8805 	}
8806 
8807 	return error_count;
8808 }
8809 
8810 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
8811 {
8812 	u32 error_count = 0;
8813 	int i;
8814 
8815 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
8816 		if (intr_cause_data & BIT_ULL(i)) {
8817 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
8818 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
8819 			error_count++;
8820 		}
8821 	}
8822 
8823 	return error_count;
8824 }
8825 
8826 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
8827 					struct cpucp_pkt_sync_err *sync_err)
8828 {
8829 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
8830 
8831 	gaudi2_print_event(hdev, event_type, false,
8832 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
8833 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
8834 }
8835 
8836 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
8837 					struct hl_eq_engine_arc_intr_data *data)
8838 {
8839 	struct hl_engine_arc_dccm_queue_full_irq *q;
8840 	u32 intr_type, engine_id;
8841 	u64 payload;
8842 
8843 	intr_type = le32_to_cpu(data->intr_type);
8844 	engine_id = le32_to_cpu(data->engine_id);
8845 	payload = le64_to_cpu(data->payload);
8846 
8847 	switch (intr_type) {
8848 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
8849 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
8850 
8851 		gaudi2_print_event(hdev, event_type, true,
8852 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u\n",
8853 				engine_id, intr_type, q->queue_index);
8854 		return 1;
8855 	default:
8856 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type\n");
8857 		return 0;
8858 	}
8859 }
8860 
8861 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
8862 {
8863 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
8864 	bool reset_required = false, is_critical = false;
8865 	u32 index, ctl, reset_flags = HL_DRV_RESET_HARD, error_count = 0;
8866 	u64 event_mask = 0;
8867 	u16 event_type;
8868 
8869 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
8870 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
8871 
8872 	if (event_type >= GAUDI2_EVENT_SIZE) {
8873 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8874 				event_type, GAUDI2_EVENT_SIZE - 1);
8875 		return;
8876 	}
8877 
8878 	gaudi2->events_stat[event_type]++;
8879 	gaudi2->events_stat_aggregate[event_type]++;
8880 
8881 	switch (event_type) {
8882 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
8883 		fallthrough;
8884 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
8885 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8886 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8887 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8888 		is_critical = eq_entry->ecc_data.is_critical;
8889 		error_count++;
8890 		break;
8891 
8892 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
8893 		fallthrough;
8894 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8895 		fallthrough;
8896 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
8897 		error_count = gaudi2_handle_qman_err(hdev, event_type);
8898 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8899 		break;
8900 
8901 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
8902 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8903 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
8904 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8905 		break;
8906 
8907 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
8908 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
8909 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
8910 		break;
8911 
8912 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8913 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8914 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
8915 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type,
8916 					&eq_entry->razwi_info, &event_mask);
8917 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8918 		break;
8919 
8920 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8921 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8922 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8923 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
8924 					&eq_entry->razwi_with_intr_cause, &event_mask);
8925 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8926 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8927 		break;
8928 
8929 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8930 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8931 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
8932 						&eq_entry->razwi_with_intr_cause, &event_mask);
8933 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
8934 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8935 		break;
8936 
8937 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
8938 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
8939 		error_count = gaudi2_handle_dec_err(hdev, index, event_type,
8940 						&eq_entry->razwi_info, &event_mask);
8941 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8942 		break;
8943 
8944 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
8945 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
8946 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
8947 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
8948 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
8949 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
8950 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
8951 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
8952 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
8953 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
8954 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
8955 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
8956 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
8957 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
8958 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
8959 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
8960 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
8961 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
8962 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
8963 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
8964 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
8965 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
8966 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
8967 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
8968 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
8969 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
8970 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
8971 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
8972 					&eq_entry->razwi_with_intr_cause, &event_mask);
8973 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8974 		break;
8975 
8976 	case GAUDI2_EVENT_DEC0_SPI:
8977 	case GAUDI2_EVENT_DEC1_SPI:
8978 	case GAUDI2_EVENT_DEC2_SPI:
8979 	case GAUDI2_EVENT_DEC3_SPI:
8980 	case GAUDI2_EVENT_DEC4_SPI:
8981 	case GAUDI2_EVENT_DEC5_SPI:
8982 	case GAUDI2_EVENT_DEC6_SPI:
8983 	case GAUDI2_EVENT_DEC7_SPI:
8984 	case GAUDI2_EVENT_DEC8_SPI:
8985 	case GAUDI2_EVENT_DEC9_SPI:
8986 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
8987 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
8988 		error_count = gaudi2_handle_dec_err(hdev, index, event_type,
8989 					&eq_entry->razwi_info, &event_mask);
8990 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
8991 		break;
8992 
8993 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8994 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8995 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8996 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8997 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8998 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8999 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9000 		error_count = gaudi2_handle_mme_err(hdev, index, event_type,
9001 				&eq_entry->razwi_info, &event_mask);
9002 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
9003 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9004 		break;
9005 
9006 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9007 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9008 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9009 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9010 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9011 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9012 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9013 		error_count = gaudi2_handle_mme_err(hdev, index, event_type,
9014 					&eq_entry->razwi_info, &event_mask);
9015 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9016 		break;
9017 
9018 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9019 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9020 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9021 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9022 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9023 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9024 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9025 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type,
9026 					&eq_entry->razwi_info, &event_mask);
9027 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9028 		break;
9029 
9030 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9031 	case GAUDI2_EVENT_KDMA0_CORE:
9032 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9033 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9034 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9035 		break;
9036 
9037 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9038 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9039 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9040 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9041 		break;
9042 
9043 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9044 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9045 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9046 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9047 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9048 		break;
9049 
9050 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9051 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9052 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9053 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9054 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9055 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9056 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9057 		break;
9058 
9059 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9060 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9061 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9062 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9063 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9064 		break;
9065 
9066 	case GAUDI2_EVENT_PMMU_FATAL_0:
9067 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9068 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9069 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9070 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9071 		break;
9072 
9073 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9074 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9075 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9076 		break;
9077 
9078 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9079 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9080 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9081 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9082 			reset_required = true;
9083 		}
9084 		error_count++;
9085 		break;
9086 
9087 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9088 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9089 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9090 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9091 		break;
9092 
9093 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9094 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9095 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9096 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9097 		break;
9098 
9099 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9100 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9101 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9102 		break;
9103 
9104 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9105 		error_count = gaudi2_handle_psoc_drain(hdev,
9106 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9107 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9108 		break;
9109 
9110 	case GAUDI2_EVENT_CPU_AXI_ECC:
9111 		error_count = GAUDI2_NA_EVENT_CAUSE;
9112 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9113 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9114 		break;
9115 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9116 		error_count = GAUDI2_NA_EVENT_CAUSE;
9117 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9118 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9119 		break;
9120 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9121 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9122 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9123 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9124 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9125 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9126 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9127 		break;
9128 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9129 		error_count = GAUDI2_NA_EVENT_CAUSE;
9130 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9131 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9132 		break;
9133 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9134 		error_count = GAUDI2_NA_EVENT_CAUSE;
9135 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9136 		break;
9137 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9138 		error_count = GAUDI2_NA_EVENT_CAUSE;
9139 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9140 		break;
9141 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9142 		error_count = GAUDI2_NA_EVENT_CAUSE;
9143 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9144 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9145 		break;
9146 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9147 		error_count = GAUDI2_NA_EVENT_CAUSE;
9148 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9149 		break;
9150 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9151 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9152 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9153 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9154 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9155 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9156 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9157 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9158 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9159 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9160 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9161 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9162 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9163 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9164 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9165 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9166 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9167 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9168 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9169 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9170 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9171 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9172 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9173 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9174 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9175 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9176 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9177 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9178 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9179 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9180 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9181 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9182 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9183 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9184 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9185 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9186 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9187 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9188 		fallthrough;
9189 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9190 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9191 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9192 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9193 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9194 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9195 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9196 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9197 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9198 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9199 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9200 		error_count = GAUDI2_NA_EVENT_CAUSE;
9201 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9202 		break;
9203 
9204 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9205 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9206 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9207 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9208 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9209 		error_count = GAUDI2_NA_EVENT_CAUSE;
9210 		break;
9211 
9212 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9213 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9214 		error_count = GAUDI2_NA_EVENT_CAUSE;
9215 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9216 		break;
9217 
9218 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9219 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9220 		error_count = GAUDI2_NA_EVENT_CAUSE;
9221 		/* Do nothing- FW will handle it */
9222 		break;
9223 
9224 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9225 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9226 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9227 		break;
9228 
9229 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9230 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9231 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9232 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9233 		break;
9234 
9235 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9236 		error_count = GAUDI2_NA_EVENT_CAUSE;
9237 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9238 		break;
9239 
9240 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9241 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9242 						le64_to_cpu(eq_entry->data[0]));
9243 		error_count = GAUDI2_NA_EVENT_CAUSE;
9244 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9245 		break;
9246 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9247 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9248 						le64_to_cpu(eq_entry->data[0]));
9249 		error_count = GAUDI2_NA_EVENT_CAUSE;
9250 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9251 		break;
9252 
9253 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9254 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9255 		error_count = GAUDI2_NA_EVENT_CAUSE;
9256 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9257 		break;
9258 
9259 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9260 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9261 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9262 		break;
9263 
9264 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9265 	case GAUDI2_EVENT_DEV_RESET_REQ:
9266 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9267 		error_count = GAUDI2_NA_EVENT_CAUSE;
9268 		is_critical = true;
9269 		break;
9270 
9271 	default:
9272 		if (gaudi2_irq_map_table[event_type].valid) {
9273 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9274 						event_type);
9275 			error_count = GAUDI2_NA_EVENT_CAUSE;
9276 		}
9277 	}
9278 
9279 	/* Make sure to dump an error in case no error cause was printed so far.
9280 	 * Note that although we have counted the errors, we use this number as
9281 	 * a boolean.
9282 	 */
9283 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9284 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9285 	else if (error_count == 0)
9286 		gaudi2_print_event(hdev, event_type, true,
9287 				"No error cause for H/W event %u\n", event_type);
9288 
9289 	if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
9290 				(hdev->hard_reset_on_fw_events ||
9291 				(hdev->asic_prop.fw_security_enabled && is_critical)))
9292 		goto reset_device;
9293 
9294 	/* Send unmask irq only for interrupts not classified as MSG */
9295 	if (!gaudi2_irq_map_table[event_type].msg)
9296 		hl_fw_unmask_irq(hdev, event_type);
9297 
9298 	if (event_mask)
9299 		hl_notifier_event_send_all(hdev, event_mask);
9300 
9301 	return;
9302 
9303 reset_device:
9304 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9305 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9306 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9307 	} else {
9308 		reset_flags |= HL_DRV_RESET_DELAY;
9309 	}
9310 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9311 	hl_device_cond_reset(hdev, reset_flags, event_mask);
9312 }
9313 
9314 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9315 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9316 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9317 {
9318 	u32 ctl, pkt_size;
9319 	int rc = 0;
9320 
9321 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9322 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9323 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9324 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9325 
9326 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9327 	lin_dma_pkt->src_addr = cpu_to_le64(val);
9328 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9329 	lin_dma_pkt->tsize = cpu_to_le32(size);
9330 
9331 	pkt_size = sizeof(struct packet_lin_dma);
9332 
9333 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9334 	if (rc)
9335 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9336 				hw_queue_id);
9337 
9338 	return rc;
9339 }
9340 
9341 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9342 {
9343 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9344 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9345 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9346 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9347 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9348 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9349 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9350 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9351 	void *lin_dma_pkts_arr;
9352 	dma_addr_t pkt_dma_addr;
9353 	int rc = 0, dma_num = 0;
9354 
9355 	if (prop->edma_enabled_mask == 0) {
9356 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9357 		return -EIO;
9358 	}
9359 
9360 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9361 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9362 	comp_addr = CFG_BASE + sob_addr;
9363 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9364 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9365 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9366 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9367 
9368 	/* Calculate how many lin dma pkts we'll need */
9369 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9370 	pkt_size = sizeof(struct packet_lin_dma);
9371 
9372 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9373 					&pkt_dma_addr, GFP_KERNEL);
9374 	if (!lin_dma_pkts_arr)
9375 		return -ENOMEM;
9376 
9377 	/*
9378 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9379 	 * only the first one to restore later
9380 	 * also set the sob addr for all edma cores for completion.
9381 	 * set QM as trusted to allow it to access physical address with MMU bp.
9382 	 */
9383 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9384 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9385 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9386 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9387 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9388 
9389 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9390 				continue;
9391 
9392 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9393 					edma_offset, mmubp);
9394 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9395 					lower_32_bits(comp_addr));
9396 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9397 					upper_32_bits(comp_addr));
9398 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9399 					comp_val);
9400 			gaudi2_qman_set_test_mode(hdev,
9401 					edma_queues_id[dcore] + 4 * edma_idx, true);
9402 		}
9403 	}
9404 
9405 	WREG32(sob_addr, 0);
9406 
9407 	while (cur_addr < end_addr) {
9408 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9409 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9410 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9411 
9412 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9413 					continue;
9414 
9415 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9416 
9417 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9418 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9419 					pkt_dma_addr + dma_num * pkt_size,
9420 					edma_queues_id[dcore] + edma_idx * 4,
9421 					chunk_size, cur_addr, val);
9422 				if (rc)
9423 					goto end;
9424 
9425 				dma_num++;
9426 				cur_addr += chunk_size;
9427 				if (cur_addr == end_addr)
9428 					break;
9429 			}
9430 		}
9431 	}
9432 
9433 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9434 	if (rc) {
9435 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9436 		goto end;
9437 	}
9438 end:
9439 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9440 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9441 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9442 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9443 
9444 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9445 				continue;
9446 
9447 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9448 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9449 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9450 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9451 			gaudi2_qman_set_test_mode(hdev,
9452 					edma_queues_id[dcore] + 4 * edma_idx, false);
9453 		}
9454 	}
9455 
9456 	WREG32(sob_addr, 0);
9457 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
9458 
9459 	return rc;
9460 }
9461 
9462 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
9463 {
9464 	int rc;
9465 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9466 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
9467 
9468 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
9469 
9470 	if (rc)
9471 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
9472 				prop->dram_user_base_address, size);
9473 	return rc;
9474 }
9475 
9476 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
9477 {
9478 	int rc;
9479 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9480 	u64 val = hdev->memory_scrub_val;
9481 	u64 addr, size;
9482 
9483 	if (!hdev->memory_scrub)
9484 		return 0;
9485 
9486 	/* scrub SRAM */
9487 	addr = prop->sram_user_base_address;
9488 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
9489 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
9490 			addr, addr + size, val);
9491 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
9492 	if (rc) {
9493 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
9494 		return rc;
9495 	}
9496 
9497 	/* scrub DRAM */
9498 	rc = gaudi2_scrub_device_dram(hdev, val);
9499 	if (rc) {
9500 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
9501 		return rc;
9502 	}
9503 	return 0;
9504 }
9505 
9506 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
9507 {
9508 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
9509 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
9510 	u32 val, size, offset;
9511 	int dcore_id;
9512 
9513 	offset = hdev->asic_prop.first_available_cq[0] * 4;
9514 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
9515 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
9516 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
9517 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
9518 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
9519 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
9520 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
9521 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
9522 
9523 	/* memset dcore0 CQ registers */
9524 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9525 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9526 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9527 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9528 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9529 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9530 
9531 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
9532 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
9533 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
9534 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
9535 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
9536 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
9537 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
9538 
9539 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9540 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
9541 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
9542 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
9543 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
9544 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
9545 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
9546 
9547 		cq_lbw_l_addr += DCORE_OFFSET;
9548 		cq_lbw_h_addr += DCORE_OFFSET;
9549 		cq_lbw_data_addr += DCORE_OFFSET;
9550 		cq_base_l_addr += DCORE_OFFSET;
9551 		cq_base_h_addr += DCORE_OFFSET;
9552 		cq_size_addr += DCORE_OFFSET;
9553 	}
9554 
9555 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
9556 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
9557 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
9558 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
9559 
9560 	/* memset dcore0 monitors */
9561 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9562 
9563 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
9564 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
9565 
9566 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
9567 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
9568 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
9569 
9570 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9571 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
9572 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
9573 		mon_sts_addr += DCORE_OFFSET;
9574 		mon_cfg_addr += DCORE_OFFSET;
9575 	}
9576 
9577 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9578 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
9579 	val = 0;
9580 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
9581 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9582 
9583 	/* memset dcore0 sobs */
9584 	gaudi2_memset_device_lbw(hdev, addr, size, val);
9585 
9586 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
9587 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
9588 
9589 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
9590 		gaudi2_memset_device_lbw(hdev, addr, size, val);
9591 		addr += DCORE_OFFSET;
9592 	}
9593 
9594 	/* Flush all WREG to prevent race */
9595 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
9596 }
9597 
9598 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
9599 {
9600 	u32 reg_base, hw_queue_id;
9601 
9602 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
9603 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9604 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9605 			continue;
9606 
9607 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9608 
9609 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9610 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9611 	}
9612 
9613 	/* Flush all WREG to prevent race */
9614 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9615 }
9616 
9617 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
9618 {
9619 	u32 reg_base, hw_queue_id;
9620 
9621 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
9622 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
9623 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
9624 			continue;
9625 
9626 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
9627 
9628 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
9629 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
9630 	}
9631 
9632 	/* Flush all WREG to prevent race */
9633 	RREG32(mmPDMA0_QM_ARB_CFG_0);
9634 }
9635 
9636 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
9637 {
9638 	return 0;
9639 }
9640 
9641 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
9642 {
9643 }
9644 
9645 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
9646 						struct dup_block_ctx *cfg_ctx)
9647 {
9648 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
9649 	u8 seq;
9650 	int i;
9651 
9652 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
9653 		seq = block_idx * cfg_ctx->instances + i;
9654 
9655 		/* skip disabled instance */
9656 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
9657 			continue;
9658 
9659 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
9660 					cfg_ctx->data);
9661 	}
9662 }
9663 
9664 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
9665 						u64 mask)
9666 {
9667 	int i;
9668 
9669 	cfg_ctx->enabled_mask = mask;
9670 
9671 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
9672 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
9673 }
9674 
9675 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
9676 {
9677 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
9678 }
9679 
9680 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
9681 {
9682 	void *host_mem_virtual_addr;
9683 	dma_addr_t host_mem_dma_addr;
9684 	u64 reserved_va_base;
9685 	u32 pos, size_left, size_to_dma;
9686 	struct hl_ctx *ctx;
9687 	int rc = 0;
9688 
9689 	/* Fetch the ctx */
9690 	ctx = hl_get_compute_ctx(hdev);
9691 	if (!ctx) {
9692 		dev_err(hdev->dev, "No ctx available\n");
9693 		return -EINVAL;
9694 	}
9695 
9696 	/* Allocate buffers for read and for poll */
9697 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
9698 								GFP_KERNEL | __GFP_ZERO);
9699 	if (host_mem_virtual_addr == NULL) {
9700 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
9701 		rc = -ENOMEM;
9702 		goto put_ctx;
9703 	}
9704 
9705 	/* Reserve VM region on asic side */
9706 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
9707 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9708 	if (!reserved_va_base) {
9709 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
9710 		rc = -ENOMEM;
9711 		goto free_data_buffer;
9712 	}
9713 
9714 	/* Create mapping on asic side */
9715 	mutex_lock(&hdev->mmu_lock);
9716 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
9717 	hl_mmu_invalidate_cache_range(hdev, false,
9718 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
9719 				      ctx->asid, reserved_va_base, SZ_2M);
9720 	mutex_unlock(&hdev->mmu_lock);
9721 	if (rc) {
9722 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
9723 		goto unreserve_va;
9724 	}
9725 
9726 	/* Enable MMU on KDMA */
9727 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
9728 
9729 	pos = 0;
9730 	size_left = size;
9731 	size_to_dma = SZ_2M;
9732 
9733 	while (size_left > 0) {
9734 		if (size_left < SZ_2M)
9735 			size_to_dma = size_left;
9736 
9737 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
9738 		if (rc)
9739 			break;
9740 
9741 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
9742 
9743 		if (size_left <= SZ_2M)
9744 			break;
9745 
9746 		pos += SZ_2M;
9747 		addr += SZ_2M;
9748 		size_left -= SZ_2M;
9749 	}
9750 
9751 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
9752 
9753 	mutex_lock(&hdev->mmu_lock);
9754 	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
9755 	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
9756 				      ctx->asid, reserved_va_base, SZ_2M);
9757 	mutex_unlock(&hdev->mmu_lock);
9758 unreserve_va:
9759 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
9760 free_data_buffer:
9761 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
9762 put_ctx:
9763 	hl_ctx_put(ctx);
9764 
9765 	return rc;
9766 }
9767 
9768 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
9769 {
9770 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9771 	int min_alloc_order, rc;
9772 
9773 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9774 		return 0;
9775 
9776 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
9777 								HOST_SPACE_INTERNAL_CB_SZ,
9778 								&hdev->internal_cb_pool_dma_addr,
9779 								GFP_KERNEL | __GFP_ZERO);
9780 
9781 	if (!hdev->internal_cb_pool_virt_addr)
9782 		return -ENOMEM;
9783 
9784 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
9785 					gaudi2_get_wait_cb_size(hdev)));
9786 
9787 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
9788 	if (!hdev->internal_cb_pool) {
9789 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
9790 		rc = -ENOMEM;
9791 		goto free_internal_cb_pool;
9792 	}
9793 
9794 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
9795 				HOST_SPACE_INTERNAL_CB_SZ, -1);
9796 	if (rc) {
9797 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
9798 		rc = -EFAULT;
9799 		goto destroy_internal_cb_pool;
9800 	}
9801 
9802 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
9803 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
9804 
9805 	if (!hdev->internal_cb_va_base) {
9806 		rc = -ENOMEM;
9807 		goto destroy_internal_cb_pool;
9808 	}
9809 
9810 	mutex_lock(&hdev->mmu_lock);
9811 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
9812 					HOST_SPACE_INTERNAL_CB_SZ);
9813 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
9814 	mutex_unlock(&hdev->mmu_lock);
9815 
9816 	if (rc)
9817 		goto unreserve_internal_cb_pool;
9818 
9819 	return 0;
9820 
9821 unreserve_internal_cb_pool:
9822 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9823 destroy_internal_cb_pool:
9824 	gen_pool_destroy(hdev->internal_cb_pool);
9825 free_internal_cb_pool:
9826 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9827 					hdev->internal_cb_pool_dma_addr);
9828 
9829 	return rc;
9830 }
9831 
9832 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
9833 {
9834 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9835 
9836 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
9837 		return;
9838 
9839 	mutex_lock(&hdev->mmu_lock);
9840 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9841 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
9842 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
9843 	mutex_unlock(&hdev->mmu_lock);
9844 
9845 	gen_pool_destroy(hdev->internal_cb_pool);
9846 
9847 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
9848 					hdev->internal_cb_pool_dma_addr);
9849 }
9850 
9851 static void gaudi2_restore_user_registers(struct hl_device *hdev)
9852 {
9853 	gaudi2_restore_user_sm_registers(hdev);
9854 	gaudi2_restore_user_qm_registers(hdev);
9855 }
9856 
9857 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9858 {
9859 	struct hl_device *hdev = ctx->hdev;
9860 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9861 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9862 	int rc;
9863 
9864 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9865 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
9866 	if (rc)
9867 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
9868 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9869 
9870 	return rc;
9871 }
9872 
9873 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
9874 {
9875 	struct hl_device *hdev = ctx->hdev;
9876 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9877 	int rc;
9878 
9879 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
9880 				prop->pmmu.page_size, true);
9881 	if (rc)
9882 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
9883 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
9884 }
9885 
9886 static int gaudi2_ctx_init(struct hl_ctx *ctx)
9887 {
9888 	int rc;
9889 
9890 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
9891 	if (rc)
9892 		return rc;
9893 
9894 	/* No need to clear user registers if the device has just
9895 	 * performed reset, we restore only nic qm registers
9896 	 */
9897 	if (ctx->hdev->reset_upon_device_release)
9898 		gaudi2_restore_nic_qm_registers(ctx->hdev);
9899 	else
9900 		gaudi2_restore_user_registers(ctx->hdev);
9901 
9902 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
9903 	if (rc)
9904 		return rc;
9905 
9906 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
9907 	if (rc)
9908 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9909 
9910 	return rc;
9911 }
9912 
9913 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
9914 {
9915 	if (ctx->asid == HL_KERNEL_ASID_ID)
9916 		return;
9917 
9918 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
9919 
9920 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
9921 }
9922 
9923 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
9924 {
9925 	struct hl_device *hdev = cs->ctx->hdev;
9926 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
9927 	u32 mon_payload, sob_id, mon_id;
9928 
9929 	if (!cs_needs_completion(cs))
9930 		return 0;
9931 
9932 	/*
9933 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
9934 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
9935 	 * cyclic index. The SOB value is increased when each of the CS jobs is
9936 	 * completed. When the SOB reaches the number of CS jobs, the monitor
9937 	 * generates MSI-X interrupt.
9938 	 */
9939 
9940 	sob_id = mon_id = index;
9941 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
9942 				(1 << CQ_ENTRY_READY_SHIFT) | index;
9943 
9944 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
9945 				cs->jobs_cnt);
9946 
9947 	return 0;
9948 }
9949 
9950 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
9951 {
9952 	return HL_INVALID_QUEUE;
9953 }
9954 
9955 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
9956 {
9957 	struct hl_cb *cb = data;
9958 	struct packet_msg_short *pkt;
9959 	u32 value, ctl, pkt_size = sizeof(*pkt);
9960 
9961 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
9962 	memset(pkt, 0, pkt_size);
9963 
9964 	/* Inc by 1, Mode ADD */
9965 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
9966 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
9967 
9968 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
9969 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
9970 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9971 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
9972 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
9973 
9974 	pkt->value = cpu_to_le32(value);
9975 	pkt->ctl = cpu_to_le32(ctl);
9976 
9977 	return size + pkt_size;
9978 }
9979 
9980 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
9981 {
9982 	u32 ctl, pkt_size = sizeof(*pkt);
9983 
9984 	memset(pkt, 0, pkt_size);
9985 
9986 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
9987 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
9988 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
9989 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
9990 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
9991 
9992 	pkt->value = cpu_to_le32(value);
9993 	pkt->ctl = cpu_to_le32(ctl);
9994 
9995 	return pkt_size;
9996 }
9997 
9998 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
9999 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10000 {
10001 	u32 ctl, value, pkt_size = sizeof(*pkt);
10002 	u8 mask;
10003 
10004 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10005 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10006 		return 0;
10007 	}
10008 
10009 	memset(pkt, 0, pkt_size);
10010 
10011 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10012 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10013 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10014 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10015 
10016 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10017 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10018 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10019 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10020 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10021 
10022 	pkt->value = cpu_to_le32(value);
10023 	pkt->ctl = cpu_to_le32(ctl);
10024 
10025 	return pkt_size;
10026 }
10027 
10028 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10029 {
10030 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10031 
10032 	memset(pkt, 0, pkt_size);
10033 
10034 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10035 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10036 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10037 
10038 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10039 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10040 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10041 
10042 	pkt->cfg = cpu_to_le32(cfg);
10043 	pkt->ctl = cpu_to_le32(ctl);
10044 
10045 	return pkt_size;
10046 }
10047 
10048 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10049 {
10050 	struct hl_cb *cb = prop->data;
10051 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10052 
10053 	u64 monitor_base, fence_addr = 0;
10054 	u32 stream_index, size = prop->size;
10055 	u16 msg_addr_offset;
10056 
10057 	stream_index = prop->q_idx % 4;
10058 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10059 			QM_FENCE2_OFFSET + stream_index * 4;
10060 
10061 	/*
10062 	 * monitor_base should be the content of the base0 address registers,
10063 	 * so it will be added to the msg short offsets
10064 	 */
10065 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10066 
10067 	/* First monitor config packet: low address of the sync */
10068 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10069 				monitor_base;
10070 
10071 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10072 
10073 	/* Second monitor config packet: high address of the sync */
10074 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10075 				monitor_base;
10076 
10077 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10078 
10079 	/*
10080 	 * Third monitor config packet: the payload, i.e. what to write when the
10081 	 * sync triggers
10082 	 */
10083 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10084 				monitor_base;
10085 
10086 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10087 
10088 	/* Fourth monitor config packet: bind the monitor to a sync object */
10089 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10090 
10091 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10092 						prop->sob_val, msg_addr_offset);
10093 
10094 	/* Fence packet */
10095 	size += gaudi2_add_fence_pkt(buf + size);
10096 
10097 	return size;
10098 }
10099 
10100 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10101 {
10102 	struct hl_hw_sob *hw_sob = data;
10103 
10104 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10105 
10106 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10107 
10108 	kref_init(&hw_sob->kref);
10109 }
10110 
10111 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10112 {
10113 }
10114 
10115 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10116 {
10117 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10118 
10119 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10120 }
10121 
10122 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10123 {
10124 	return 0;
10125 }
10126 
10127 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10128 					struct hl_cs *cs, u32 wait_queue_id,
10129 					u32 collective_engine_id, u32 encaps_signal_offset)
10130 {
10131 	return -EINVAL;
10132 }
10133 
10134 /*
10135  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10136  *                   to DMMU page-size address (64MB) before mapping it in
10137  *                   the MMU.
10138  * The operation is performed on both the virtual and physical addresses.
10139  * for device with 6 HBMs the scramble is:
10140  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10141  *
10142  * Example:
10143  * =============================================================================
10144  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10145  * Phys address                                                     in MMU last
10146  *                                                                    HOP
10147  * =============================================================================
10148  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10149  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10150  * =============================================================================
10151  */
10152 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10153 {
10154 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10155 	u32 divisor, mod_va;
10156 	u64 div_va;
10157 
10158 	/* accept any address in the DRAM address space */
10159 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10160 									VA_HBM_SPACE_END)) {
10161 
10162 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10163 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10164 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10165 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10166 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10167 	}
10168 
10169 	return raw_addr;
10170 }
10171 
10172 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10173 {
10174 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10175 	u32 divisor, mod_va;
10176 	u64 div_va;
10177 
10178 	/* accept any address in the DRAM address space */
10179 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10180 									VA_HBM_SPACE_END)) {
10181 
10182 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10183 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10184 					PAGE_SIZE_64MB, &mod_va);
10185 
10186 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10187 					(div_va * divisor + mod_va));
10188 	}
10189 
10190 	return scrambled_addr;
10191 }
10192 
10193 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10194 {
10195 	u32 base = 0, dcore_id, dec_id;
10196 
10197 	if (core_id >= NUMBER_OF_DEC) {
10198 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10199 		goto out;
10200 	}
10201 
10202 	if (core_id < 8) {
10203 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10204 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10205 
10206 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10207 				dec_id * DCORE_VDEC_OFFSET;
10208 	} else {
10209 		/* PCIe Shared Decoder */
10210 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10211 	}
10212 out:
10213 	return base;
10214 }
10215 
10216 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10217 				u32 *block_size, u32 *block_id)
10218 {
10219 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10220 	int i;
10221 
10222 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10223 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10224 			*block_id = i;
10225 			if (block_size)
10226 				*block_size = gaudi2->mapped_blocks[i].size;
10227 			return 0;
10228 		}
10229 	}
10230 
10231 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10232 
10233 	return -EINVAL;
10234 }
10235 
10236 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10237 			u32 block_id, u32 block_size)
10238 {
10239 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10240 	u64 offset_in_bar;
10241 	u64 address;
10242 	int rc;
10243 
10244 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10245 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10246 		return -EINVAL;
10247 	}
10248 
10249 	/* we allow mapping only an entire block */
10250 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10251 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10252 		return -EINVAL;
10253 	}
10254 
10255 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10256 
10257 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10258 
10259 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10260 			VM_DONTCOPY | VM_NORESERVE;
10261 
10262 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10263 			block_size, vma->vm_page_prot);
10264 	if (rc)
10265 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10266 
10267 	return rc;
10268 }
10269 
10270 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10271 {
10272 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10273 
10274 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10275 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10276 
10277 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10278 		WREG32(irq_handler_offset,
10279 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10280 }
10281 
10282 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10283 {
10284 	switch (mmu_id) {
10285 	case HW_CAP_DCORE0_DMMU0:
10286 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10287 		break;
10288 	case HW_CAP_DCORE0_DMMU1:
10289 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10290 		break;
10291 	case HW_CAP_DCORE0_DMMU2:
10292 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10293 		break;
10294 	case HW_CAP_DCORE0_DMMU3:
10295 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10296 		break;
10297 	case HW_CAP_DCORE1_DMMU0:
10298 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10299 		break;
10300 	case HW_CAP_DCORE1_DMMU1:
10301 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10302 		break;
10303 	case HW_CAP_DCORE1_DMMU2:
10304 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10305 		break;
10306 	case HW_CAP_DCORE1_DMMU3:
10307 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10308 		break;
10309 	case HW_CAP_DCORE2_DMMU0:
10310 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10311 		break;
10312 	case HW_CAP_DCORE2_DMMU1:
10313 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10314 		break;
10315 	case HW_CAP_DCORE2_DMMU2:
10316 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10317 		break;
10318 	case HW_CAP_DCORE2_DMMU3:
10319 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10320 		break;
10321 	case HW_CAP_DCORE3_DMMU0:
10322 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10323 		break;
10324 	case HW_CAP_DCORE3_DMMU1:
10325 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10326 		break;
10327 	case HW_CAP_DCORE3_DMMU2:
10328 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10329 		break;
10330 	case HW_CAP_DCORE3_DMMU3:
10331 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10332 		break;
10333 	case HW_CAP_PMMU:
10334 		*mmu_base = mmPMMU_HBW_MMU_BASE;
10335 		break;
10336 	default:
10337 		return -EINVAL;
10338 	}
10339 
10340 	return 0;
10341 }
10342 
10343 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10344 {
10345 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10346 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10347 	u32 mmu_base;
10348 
10349 	if (!(gaudi2->hw_cap_initialized & mmu_id))
10350 		return;
10351 
10352 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10353 		return;
10354 
10355 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10356 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10357 }
10358 
10359 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10360 {
10361 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10362 
10363 	/* check all HMMUs */
10364 	for (i = 0 ; i < num_of_hmmus ; i++) {
10365 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10366 
10367 		if (mmu_cap_mask & mmu_id)
10368 			gaudi2_ack_mmu_error(hdev, mmu_id);
10369 	}
10370 
10371 	/* check PMMU */
10372 	if (mmu_cap_mask & HW_CAP_PMMU)
10373 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10374 
10375 	return 0;
10376 }
10377 
10378 static void gaudi2_get_msi_info(__le32 *table)
10379 {
10380 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10381 }
10382 
10383 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10384 {
10385 	switch (pll_idx) {
10386 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10387 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10388 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10389 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10390 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10391 	case HL_GAUDI2_MME_PLL: return MME_PLL;
10392 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10393 	case HL_GAUDI2_IF_PLL: return IF_PLL;
10394 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10395 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10396 	case HL_GAUDI2_VID_PLL: return VID_PLL;
10397 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10398 	default: return -EINVAL;
10399 	}
10400 }
10401 
10402 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10403 {
10404 	/* Not implemented */
10405 	return 0;
10406 }
10407 
10408 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10409 {
10410 	/* Not implemented */
10411 	return 0;
10412 }
10413 
10414 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10415 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
10416 {
10417 	/* Not implemented */
10418 	return 0;
10419 }
10420 
10421 
10422 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10423 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
10424 				u32 engine_id, char **buf, size_t *size, size_t *offset)
10425 {
10426 	/* Not implemented */
10427 	return 0;
10428 }
10429 
10430 
10431 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10432 	.monitor_valid = gaudi2_monitor_valid,
10433 	.print_single_monitor = gaudi2_print_single_monitor,
10434 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
10435 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
10436 };
10437 
10438 static void gaudi2_state_dump_init(struct hl_device *hdev)
10439 {
10440 	/* Not implemented */
10441 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
10442 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
10443 }
10444 
10445 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
10446 {
10447 	return 0;
10448 }
10449 
10450 static u32 *gaudi2_get_stream_master_qid_arr(void)
10451 {
10452 	return NULL;
10453 }
10454 
10455 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
10456 				struct attribute_group *dev_vrm_attr_grp)
10457 {
10458 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
10459 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
10460 }
10461 
10462 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
10463 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
10464 {
10465 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10466 
10467 	/* for host pages the page size must be  */
10468 	if (!is_dram_addr) {
10469 		if (page_size % mmu_prop->page_size)
10470 			goto page_size_err;
10471 
10472 		*real_page_size = mmu_prop->page_size;
10473 		return 0;
10474 	}
10475 
10476 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
10477 		goto page_size_err;
10478 
10479 	/*
10480 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
10481 	 * than DRAM page size).
10482 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
10483 	 * this mismatch when calculating the address to place in the MMU page table.
10484 	 * (in that case also make sure that the dram_page_size is not greater than the
10485 	 * mmu page size)
10486 	 */
10487 	*real_page_size = prop->dram_page_size;
10488 
10489 	return 0;
10490 
10491 page_size_err:
10492 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
10493 							page_size, mmu_prop->page_size >> 10);
10494 	return -EFAULT;
10495 }
10496 
10497 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
10498 {
10499 	return -EOPNOTSUPP;
10500 }
10501 
10502 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
10503 {
10504 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10505 
10506 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
10507 		return 0;
10508 
10509 	return hl_fw_send_device_activity(hdev, open);
10510 }
10511 
10512 static const struct hl_asic_funcs gaudi2_funcs = {
10513 	.early_init = gaudi2_early_init,
10514 	.early_fini = gaudi2_early_fini,
10515 	.late_init = gaudi2_late_init,
10516 	.late_fini = gaudi2_late_fini,
10517 	.sw_init = gaudi2_sw_init,
10518 	.sw_fini = gaudi2_sw_fini,
10519 	.hw_init = gaudi2_hw_init,
10520 	.hw_fini = gaudi2_hw_fini,
10521 	.halt_engines = gaudi2_halt_engines,
10522 	.suspend = gaudi2_suspend,
10523 	.resume = gaudi2_resume,
10524 	.mmap = gaudi2_mmap,
10525 	.ring_doorbell = gaudi2_ring_doorbell,
10526 	.pqe_write = gaudi2_pqe_write,
10527 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
10528 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
10529 	.scrub_device_mem = gaudi2_scrub_device_mem,
10530 	.scrub_device_dram = gaudi2_scrub_device_dram,
10531 	.get_int_queue_base = NULL,
10532 	.test_queues = gaudi2_test_queues,
10533 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
10534 	.asic_dma_pool_free = gaudi2_dma_pool_free,
10535 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
10536 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
10537 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
10538 	.asic_dma_map_single = gaudi2_dma_map_single,
10539 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
10540 	.cs_parser = gaudi2_cs_parser,
10541 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
10542 	.add_end_of_cb_packets = NULL,
10543 	.update_eq_ci = gaudi2_update_eq_ci,
10544 	.context_switch = gaudi2_context_switch,
10545 	.restore_phase_topology = gaudi2_restore_phase_topology,
10546 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
10547 	.add_device_attr = gaudi2_add_device_attr,
10548 	.handle_eqe = gaudi2_handle_eqe,
10549 	.get_events_stat = gaudi2_get_events_stat,
10550 	.read_pte = NULL,
10551 	.write_pte = NULL,
10552 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
10553 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
10554 	.mmu_prefetch_cache_range = NULL,
10555 	.send_heartbeat = gaudi2_send_heartbeat,
10556 	.debug_coresight = gaudi2_debug_coresight,
10557 	.is_device_idle = gaudi2_is_device_idle,
10558 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
10559 	.hw_queues_lock = gaudi2_hw_queues_lock,
10560 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
10561 	.get_pci_id = gaudi2_get_pci_id,
10562 	.get_eeprom_data = gaudi2_get_eeprom_data,
10563 	.get_monitor_dump = gaudi2_get_monitor_dump,
10564 	.send_cpu_message = gaudi2_send_cpu_message,
10565 	.pci_bars_map = gaudi2_pci_bars_map,
10566 	.init_iatu = gaudi2_init_iatu,
10567 	.rreg = hl_rreg,
10568 	.wreg = hl_wreg,
10569 	.halt_coresight = gaudi2_halt_coresight,
10570 	.ctx_init = gaudi2_ctx_init,
10571 	.ctx_fini = gaudi2_ctx_fini,
10572 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
10573 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
10574 	.load_firmware_to_device = NULL,
10575 	.load_boot_fit_to_device = NULL,
10576 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
10577 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
10578 	.gen_signal_cb = gaudi2_gen_signal_cb,
10579 	.gen_wait_cb = gaudi2_gen_wait_cb,
10580 	.reset_sob = gaudi2_reset_sob,
10581 	.reset_sob_group = gaudi2_reset_sob_group,
10582 	.get_device_time = gaudi2_get_device_time,
10583 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
10584 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
10585 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
10586 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
10587 	.scramble_addr = gaudi2_mmu_scramble_addr,
10588 	.descramble_addr = gaudi2_mmu_descramble_addr,
10589 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
10590 	.get_hw_block_id = gaudi2_get_hw_block_id,
10591 	.hw_block_mmap = gaudi2_block_mmap,
10592 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
10593 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
10594 	.get_msi_info = gaudi2_get_msi_info,
10595 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
10596 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
10597 	.init_firmware_loader = gaudi2_init_firmware_loader,
10598 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
10599 	.state_dump_init = gaudi2_state_dump_init,
10600 	.get_sob_addr = &gaudi2_get_sob_addr,
10601 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
10602 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
10603 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
10604 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
10605 	.access_dev_mem = hl_access_dev_mem,
10606 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
10607 	.set_engine_cores = gaudi2_set_engine_cores,
10608 	.send_device_activity = gaudi2_send_device_activity,
10609 	.set_dram_properties = gaudi2_set_dram_properties,
10610 	.set_binning_masks = gaudi2_set_binning_masks,
10611 };
10612 
10613 void gaudi2_set_asic_funcs(struct hl_device *hdev)
10614 {
10615 	hdev->asic_funcs = &gaudi2_funcs;
10616 }
10617