xref: /openbmc/linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision 0e73f1ba602d953ee8ceda5cea3a381bf212b80b)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
118 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
119 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
120 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
121 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
122 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
123 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
124 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
125 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
126 };
127 
128 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
129 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
130 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
131 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
132 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
133 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
134 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
135 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
136 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
137 };
138 
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
141 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
142 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
143 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
144 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
145 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
146 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
147 	[PACKET_FENCE]		= sizeof(struct packet_fence),
148 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
149 	[PACKET_NOP]		= sizeof(struct packet_nop),
150 	[PACKET_STOP]		= sizeof(struct packet_stop),
151 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
152 	[PACKET_WAIT]		= sizeof(struct packet_wait),
153 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
154 };
155 
156 static inline bool validate_packet_id(enum packet_id id)
157 {
158 	switch (id) {
159 	case PACKET_WREG_32:
160 	case PACKET_WREG_BULK:
161 	case PACKET_MSG_LONG:
162 	case PACKET_MSG_SHORT:
163 	case PACKET_CP_DMA:
164 	case PACKET_REPEAT:
165 	case PACKET_MSG_PROT:
166 	case PACKET_FENCE:
167 	case PACKET_LIN_DMA:
168 	case PACKET_NOP:
169 	case PACKET_STOP:
170 	case PACKET_ARB_POINT:
171 	case PACKET_WAIT:
172 	case PACKET_LOAD_AND_EXE:
173 		return true;
174 	default:
175 		return false;
176 	}
177 }
178 
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 	"tpc_address_exceed_slm",
182 	"tpc_div_by_0",
183 	"tpc_spu_mac_overflow",
184 	"tpc_spu_addsub_overflow",
185 	"tpc_spu_abs_overflow",
186 	"tpc_spu_fp_dst_nan_inf",
187 	"tpc_spu_fp_dst_denorm",
188 	"tpc_vpu_mac_overflow",
189 	"tpc_vpu_addsub_overflow",
190 	"tpc_vpu_abs_overflow",
191 	"tpc_vpu_fp_dst_nan_inf",
192 	"tpc_vpu_fp_dst_denorm",
193 	"tpc_assertions",
194 	"tpc_illegal_instruction",
195 	"tpc_pc_wrap_around",
196 	"tpc_qm_sw_err",
197 	"tpc_hbw_rresp_err",
198 	"tpc_hbw_bresp_err",
199 	"tpc_lbw_rresp_err",
200 	"tpc_lbw_bresp_err"
201 };
202 
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 	"PQ AXI HBW error",
206 	"CQ AXI HBW error",
207 	"CP AXI HBW error",
208 	"CP error due to undefined OPCODE",
209 	"CP encountered STOP OPCODE",
210 	"CP AXI LBW error",
211 	"CP WRREG32 or WRBULK returned error",
212 	"N/A",
213 	"FENCE 0 inc over max value and clipped",
214 	"FENCE 1 inc over max value and clipped",
215 	"FENCE 2 inc over max value and clipped",
216 	"FENCE 3 inc over max value and clipped",
217 	"FENCE 0 dec under min value and clipped",
218 	"FENCE 1 dec under min value and clipped",
219 	"FENCE 2 dec under min value and clipped",
220 	"FENCE 3 dec under min value and clipped"
221 };
222 
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 	"Choice push while full error",
226 	"Choice Q watchdog error",
227 	"MSG AXI LBW returned with error"
228 };
229 
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345 
346 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
347 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
348 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
349 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
350 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
351 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
352 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
353 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
354 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
355 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
356 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
357 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
358 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
359 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
360 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
361 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
362 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
363 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
364 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
365 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
366 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
367 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
368 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
369 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
370 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
371 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
372 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
373 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
374 };
375 
376 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
377 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
378 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
379 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
380 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
381 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
382 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
383 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
384 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
385 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
386 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
387 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
388 };
389 
390 static s64 gaudi_state_dump_specs_props[] = {
391 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
392 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
393 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
394 	[SP_MON_OBJ_WR_ADDR_LOW] =
395 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
396 	[SP_MON_OBJ_WR_ADDR_HIGH] =
397 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
398 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
399 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
400 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
401 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
402 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
403 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
404 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
405 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
406 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
407 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
408 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
409 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
410 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
411 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
412 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
413 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
414 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
415 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
416 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
417 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
418 	[SP_FENCE0_CNT_OFFSET] =
419 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
420 	[SP_FENCE0_RDATA_OFFSET] =
421 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
422 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
423 	[SP_NUM_CORES] = 1,
424 };
425 
426 static const int gaudi_queue_id_to_engine_id[] = {
427 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
428 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
429 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
430 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
431 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
432 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
433 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
434 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
435 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
436 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
437 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
438 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
439 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
440 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
441 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
442 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
443 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
444 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
445 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
446 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
447 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
448 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
449 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
450 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
451 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
452 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
453 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
454 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
455 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
456 };
457 
458 /* The order here is opposite to the order of the indexing in the h/w.
459  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
460  */
461 static const char * const gaudi_sync_manager_names[] = {
462 	"SYNC_MGR_E_N",
463 	"SYNC_MGR_W_N",
464 	"SYNC_MGR_E_S",
465 	"SYNC_MGR_W_S",
466 	NULL
467 };
468 
469 struct ecc_info_extract_params {
470 	u64 block_address;
471 	u32 num_memories;
472 	bool derr;
473 };
474 
475 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
476 								u64 phys_addr);
477 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
478 					struct hl_cs_job *job);
479 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
480 					u32 size, u64 val);
481 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
482 					u32 num_regs, u32 val);
483 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
484 				u32 tpc_id);
485 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
486 static int gaudi_cpucp_info_get(struct hl_device *hdev);
487 static void gaudi_disable_clock_gating(struct hl_device *hdev);
488 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
489 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
490 				u32 size, bool eb);
491 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
492 				struct hl_gen_wait_properties *prop);
493 static inline enum hl_collective_mode
494 get_collective_mode(struct hl_device *hdev, u32 queue_id)
495 {
496 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
497 		return HL_COLLECTIVE_MASTER;
498 
499 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
500 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
501 		return HL_COLLECTIVE_SLAVE;
502 
503 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
504 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
505 		return HL_COLLECTIVE_SLAVE;
506 
507 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
508 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
509 		return HL_COLLECTIVE_SLAVE;
510 
511 	return HL_COLLECTIVE_NOT_SUPPORTED;
512 }
513 
514 static inline void set_default_power_values(struct hl_device *hdev)
515 {
516 	struct asic_fixed_properties *prop = &hdev->asic_prop;
517 
518 	if (hdev->card_type == cpucp_card_type_pmc) {
519 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
520 
521 		if (prop->fw_security_enabled)
522 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
523 		else
524 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
525 	} else {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
527 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
528 	}
529 }
530 
531 static int gaudi_set_fixed_properties(struct hl_device *hdev)
532 {
533 	struct asic_fixed_properties *prop = &hdev->asic_prop;
534 	u32 num_sync_stream_queues = 0;
535 	int i;
536 
537 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
538 	prop->hw_queues_props = kcalloc(prop->max_queues,
539 			sizeof(struct hw_queue_properties),
540 			GFP_KERNEL);
541 
542 	if (!prop->hw_queues_props)
543 		return -ENOMEM;
544 
545 	for (i = 0 ; i < prop->max_queues ; i++) {
546 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
547 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
548 			prop->hw_queues_props[i].driver_only = 0;
549 			prop->hw_queues_props[i].supports_sync_stream = 1;
550 			prop->hw_queues_props[i].cb_alloc_flags =
551 				CB_ALLOC_KERNEL;
552 			num_sync_stream_queues++;
553 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
555 			prop->hw_queues_props[i].driver_only = 1;
556 			prop->hw_queues_props[i].supports_sync_stream = 0;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
560 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
561 			prop->hw_queues_props[i].driver_only = 0;
562 			prop->hw_queues_props[i].supports_sync_stream = 0;
563 			prop->hw_queues_props[i].cb_alloc_flags =
564 				CB_ALLOC_USER;
565 
566 		}
567 		prop->hw_queues_props[i].collective_mode =
568 						get_collective_mode(hdev, i);
569 	}
570 
571 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
572 	prop->cfg_base_address = CFG_BASE;
573 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
574 	prop->host_base_address = HOST_PHYS_BASE;
575 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
576 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
577 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
578 	prop->collective_first_sob = 0;
579 	prop->collective_first_mon = 0;
580 
581 	/* 2 SOBs per internal queue stream are reserved for collective */
582 	prop->sync_stream_first_sob =
583 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
584 			* QMAN_STREAMS * HL_RSVD_SOBS;
585 
586 	/* 1 monitor per internal queue stream are reserved for collective
587 	 * 2 monitors per external queue stream are reserved for collective
588 	 */
589 	prop->sync_stream_first_mon =
590 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
591 			(NUMBER_OF_EXT_HW_QUEUES * 2);
592 
593 	prop->dram_base_address = DRAM_PHYS_BASE;
594 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
595 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
596 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
597 
598 	prop->sram_base_address = SRAM_BASE_ADDR;
599 	prop->sram_size = SRAM_SIZE;
600 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
601 	prop->sram_user_base_address =
602 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
603 
604 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
605 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
606 
607 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
608 	if (hdev->pldm)
609 		prop->mmu_pgt_size = 0x800000; /* 8MB */
610 	else
611 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
612 	prop->mmu_pte_size = HL_PTE_SIZE;
613 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
614 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
615 	prop->dram_page_size = PAGE_SIZE_2MB;
616 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
617 	prop->dram_supports_virtual_memory = false;
618 
619 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
620 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
621 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
622 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
623 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
624 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
625 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
626 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
627 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
628 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
629 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
630 	prop->pmmu.end_addr =
631 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
632 	prop->pmmu.page_size = PAGE_SIZE_4KB;
633 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
634 	prop->pmmu.last_mask = LAST_MASK;
635 	/* TODO: will be duplicated until implementing per-MMU props */
636 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
637 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
638 
639 	/* PMMU and HPMMU are the same except of page size */
640 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
641 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
642 
643 	/* shifts and masks are the same in PMMU and DMMU */
644 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
645 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
646 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
647 	prop->dmmu.page_size = PAGE_SIZE_2MB;
648 
649 	prop->cfg_size = CFG_SIZE;
650 	prop->max_asid = MAX_ASID;
651 	prop->num_of_events = GAUDI_EVENT_SIZE;
652 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
653 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
654 
655 	set_default_power_values(hdev);
656 
657 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
658 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
659 
660 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
661 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
662 
663 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
664 					CARD_NAME_MAX_LEN);
665 
666 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
667 
668 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
669 			prop->sync_stream_first_sob +
670 			(num_sync_stream_queues * HL_RSVD_SOBS);
671 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
672 			prop->sync_stream_first_mon +
673 			(num_sync_stream_queues * HL_RSVD_MONS);
674 
675 	prop->first_available_user_interrupt = USHRT_MAX;
676 	prop->tpc_interrupt_id = USHRT_MAX;
677 
678 	/* single msi */
679 	prop->eq_interrupt_id = 0;
680 
681 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
682 		prop->first_available_cq[i] = USHRT_MAX;
683 
684 	prop->fw_cpu_boot_dev_sts0_valid = false;
685 	prop->fw_cpu_boot_dev_sts1_valid = false;
686 	prop->hard_reset_done_by_fw = false;
687 	prop->gic_interrupts_enable = true;
688 
689 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
690 
691 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
692 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
693 
694 	prop->use_get_power_for_reset_history = true;
695 
696 	prop->configurable_stop_on_err = true;
697 
698 	prop->set_max_power_on_device_init = true;
699 
700 	prop->dma_mask = 48;
701 
702 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
703 
704 	return 0;
705 }
706 
707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 	bool is_wc[3] = {false, false, true};
711 	int rc;
712 
713 	rc = hl_pci_bars_map(hdev, name, is_wc);
714 	if (rc)
715 		return rc;
716 
717 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
719 
720 	return 0;
721 }
722 
723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725 	struct gaudi_device *gaudi = hdev->asic_specific;
726 	struct hl_inbound_pci_region pci_region;
727 	u64 old_addr = addr;
728 	int rc;
729 
730 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731 		return old_addr;
732 
733 	if (hdev->asic_prop.iatu_done_by_fw)
734 		return U64_MAX;
735 
736 	/* Inbound Region 2 - Bar 4 - Point to HBM */
737 	pci_region.mode = PCI_BAR_MATCH_MODE;
738 	pci_region.bar = HBM_BAR_ID;
739 	pci_region.addr = addr;
740 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741 	if (rc)
742 		return U64_MAX;
743 
744 	if (gaudi) {
745 		old_addr = gaudi->hbm_bar_cur_addr;
746 		gaudi->hbm_bar_cur_addr = addr;
747 	}
748 
749 	return old_addr;
750 }
751 
752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754 	struct hl_inbound_pci_region inbound_region;
755 	struct hl_outbound_pci_region outbound_region;
756 	int rc;
757 
758 	if (hdev->asic_prop.iatu_done_by_fw)
759 		return 0;
760 
761 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762 	inbound_region.mode = PCI_BAR_MATCH_MODE;
763 	inbound_region.bar = SRAM_BAR_ID;
764 	inbound_region.addr = SRAM_BASE_ADDR;
765 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766 	if (rc)
767 		goto done;
768 
769 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770 	inbound_region.mode = PCI_BAR_MATCH_MODE;
771 	inbound_region.bar = CFG_BAR_ID;
772 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
773 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774 	if (rc)
775 		goto done;
776 
777 	/* Inbound Region 2 - Bar 4 - Point to HBM */
778 	inbound_region.mode = PCI_BAR_MATCH_MODE;
779 	inbound_region.bar = HBM_BAR_ID;
780 	inbound_region.addr = DRAM_PHYS_BASE;
781 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782 	if (rc)
783 		goto done;
784 
785 	/* Outbound Region 0 - Point to Host */
786 	outbound_region.addr = HOST_PHYS_BASE;
787 	outbound_region.size = HOST_PHYS_SIZE;
788 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789 
790 done:
791 	return rc;
792 }
793 
794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796 	return RREG32(mmHW_STATE);
797 }
798 
799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801 	struct asic_fixed_properties *prop = &hdev->asic_prop;
802 	struct pci_dev *pdev = hdev->pdev;
803 	resource_size_t pci_bar_size;
804 	u32 fw_boot_status;
805 	int rc;
806 
807 	rc = gaudi_set_fixed_properties(hdev);
808 	if (rc) {
809 		dev_err(hdev->dev, "Failed setting fixed properties\n");
810 		return rc;
811 	}
812 
813 	/* Check BAR sizes */
814 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815 
816 	if (pci_bar_size != SRAM_BAR_SIZE) {
817 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819 		rc = -ENODEV;
820 		goto free_queue_props;
821 	}
822 
823 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824 
825 	if (pci_bar_size != CFG_BAR_SIZE) {
826 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828 		rc = -ENODEV;
829 		goto free_queue_props;
830 	}
831 
832 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834 
835 	/* If FW security is enabled at this point it means no access to ELBI */
836 	if (hdev->asic_prop.fw_security_enabled) {
837 		hdev->asic_prop.iatu_done_by_fw = true;
838 
839 		/*
840 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841 		 * decision can only be taken based on PCI ID security.
842 		 */
843 		hdev->asic_prop.gic_interrupts_enable = false;
844 		goto pci_init;
845 	}
846 
847 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848 				&fw_boot_status);
849 	if (rc)
850 		goto free_queue_props;
851 
852 	/* Check whether FW is configuring iATU */
853 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855 		hdev->asic_prop.iatu_done_by_fw = true;
856 
857 pci_init:
858 	rc = hl_pci_init(hdev);
859 	if (rc)
860 		goto free_queue_props;
861 
862 	/* Before continuing in the initialization, we need to read the preboot
863 	 * version to determine whether we run with a security-enabled firmware
864 	 */
865 	rc = hl_fw_read_preboot_status(hdev);
866 	if (rc) {
867 		if (hdev->reset_on_preboot_fail)
868 			/* we are already on failure flow, so don't check if hw_fini fails. */
869 			hdev->asic_funcs->hw_fini(hdev, true, false);
870 		goto pci_fini;
871 	}
872 
873 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
874 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
875 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
876 		if (rc) {
877 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
878 			goto pci_fini;
879 		}
880 	}
881 
882 	return 0;
883 
884 pci_fini:
885 	hl_pci_fini(hdev);
886 free_queue_props:
887 	kfree(hdev->asic_prop.hw_queues_props);
888 	return rc;
889 }
890 
891 static int gaudi_early_fini(struct hl_device *hdev)
892 {
893 	kfree(hdev->asic_prop.hw_queues_props);
894 	hl_pci_fini(hdev);
895 
896 	return 0;
897 }
898 
899 /**
900  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
901  *
902  * @hdev: pointer to hl_device structure
903  *
904  */
905 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
906 {
907 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
908 	struct asic_fixed_properties *prop = &hdev->asic_prop;
909 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
910 	int rc;
911 
912 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
913 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
914 		struct gaudi_device *gaudi = hdev->asic_specific;
915 
916 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
917 			return 0;
918 
919 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
920 
921 		if (rc)
922 			return rc;
923 
924 		freq = pll_freq_arr[2];
925 	} else {
926 		/* Backward compatibility */
927 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
928 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
929 		nr = RREG32(mmPSOC_CPU_PLL_NR);
930 		nf = RREG32(mmPSOC_CPU_PLL_NF);
931 		od = RREG32(mmPSOC_CPU_PLL_OD);
932 
933 		if (div_sel == DIV_SEL_REF_CLK ||
934 				div_sel == DIV_SEL_DIVIDED_REF) {
935 			if (div_sel == DIV_SEL_REF_CLK)
936 				freq = PLL_REF_CLK;
937 			else
938 				freq = PLL_REF_CLK / (div_fctr + 1);
939 		} else if (div_sel == DIV_SEL_PLL_CLK ||
940 			div_sel == DIV_SEL_DIVIDED_PLL) {
941 			pll_clk = PLL_REF_CLK * (nf + 1) /
942 					((nr + 1) * (od + 1));
943 			if (div_sel == DIV_SEL_PLL_CLK)
944 				freq = pll_clk;
945 			else
946 				freq = pll_clk / (div_fctr + 1);
947 		} else {
948 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
949 			freq = 0;
950 		}
951 	}
952 
953 	prop->psoc_timestamp_frequency = freq;
954 	prop->psoc_pci_pll_nr = nr;
955 	prop->psoc_pci_pll_nf = nf;
956 	prop->psoc_pci_pll_od = od;
957 	prop->psoc_pci_pll_div_factor = div_fctr;
958 
959 	return 0;
960 }
961 
962 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
963 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
964 {
965 	struct asic_fixed_properties *prop = &hdev->asic_prop;
966 	struct packet_lin_dma *init_tpc_mem_pkt;
967 	struct hl_cs_job *job;
968 	struct hl_cb *cb;
969 	u64 dst_addr;
970 	u32 cb_size, ctl;
971 	u8 tpc_id;
972 	int rc;
973 
974 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
975 	if (!cb)
976 		return -EFAULT;
977 
978 	init_tpc_mem_pkt = cb->kernel_address;
979 	cb_size = sizeof(*init_tpc_mem_pkt);
980 	memset(init_tpc_mem_pkt, 0, cb_size);
981 
982 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
983 
984 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
985 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
986 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
987 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
988 
989 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
990 
991 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
992 
993 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
994 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
995 				round_up(prop->sram_user_base_address, SZ_8K));
996 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
997 
998 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
999 	if (!job) {
1000 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1001 		rc = -ENOMEM;
1002 		goto release_cb;
1003 	}
1004 
1005 	job->id = 0;
1006 	job->user_cb = cb;
1007 	atomic_inc(&job->user_cb->cs_cnt);
1008 	job->user_cb_size = cb_size;
1009 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1010 	job->patched_cb = job->user_cb;
1011 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1012 
1013 	hl_debugfs_add_job(hdev, job);
1014 
1015 	rc = gaudi_send_job_on_qman0(hdev, job);
1016 
1017 	if (rc)
1018 		goto free_job;
1019 
1020 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1021 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1022 		if (rc)
1023 			break;
1024 	}
1025 
1026 free_job:
1027 	hl_userptr_delete_list(hdev, &job->userptr_list);
1028 	hl_debugfs_remove_job(hdev, job);
1029 	kfree(job);
1030 	atomic_dec(&cb->cs_cnt);
1031 
1032 release_cb:
1033 	hl_cb_put(cb);
1034 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1035 
1036 	return rc;
1037 }
1038 
1039 /*
1040  * gaudi_init_tpc_mem() - Initialize TPC memories.
1041  * @hdev: Pointer to hl_device structure.
1042  *
1043  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1044  *
1045  * Return: 0 for success, negative value for error.
1046  */
1047 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1048 {
1049 	const struct firmware *fw;
1050 	size_t fw_size;
1051 	void *cpu_addr;
1052 	dma_addr_t dma_handle;
1053 	int rc, count = 5;
1054 
1055 again:
1056 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1057 	if (rc == -EINTR && count-- > 0) {
1058 		msleep(50);
1059 		goto again;
1060 	}
1061 
1062 	if (rc) {
1063 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1064 				GAUDI_TPC_FW_FILE);
1065 		goto out;
1066 	}
1067 
1068 	fw_size = fw->size;
1069 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1070 	if (!cpu_addr) {
1071 		dev_err(hdev->dev,
1072 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1073 			fw_size);
1074 		rc = -ENOMEM;
1075 		goto out;
1076 	}
1077 
1078 	memcpy(cpu_addr, fw->data, fw_size);
1079 
1080 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1081 
1082 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1083 
1084 out:
1085 	release_firmware(fw);
1086 	return rc;
1087 }
1088 
1089 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1090 {
1091 	struct gaudi_device *gaudi = hdev->asic_specific;
1092 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1093 	struct hl_hw_queue *q;
1094 	u32 i, sob_id, sob_group_id, queue_id;
1095 
1096 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1097 	sob_group_id =
1098 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1099 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1100 
1101 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1102 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1103 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1104 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1105 	}
1106 
1107 	/* Both DMA5 and TPC7 use the same resources since only a single
1108 	 * engine need to participate in the reduction process
1109 	 */
1110 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1111 	q = &hdev->kernel_queues[queue_id];
1112 	q->sync_stream_prop.collective_sob_id =
1113 			sob_id + NIC_NUMBER_OF_ENGINES;
1114 
1115 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1116 	q = &hdev->kernel_queues[queue_id];
1117 	q->sync_stream_prop.collective_sob_id =
1118 			sob_id + NIC_NUMBER_OF_ENGINES;
1119 }
1120 
1121 static void gaudi_sob_group_hw_reset(struct kref *ref)
1122 {
1123 	struct gaudi_hw_sob_group *hw_sob_group =
1124 		container_of(ref, struct gaudi_hw_sob_group, kref);
1125 	struct hl_device *hdev = hw_sob_group->hdev;
1126 	int i;
1127 
1128 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1129 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1130 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1131 
1132 	kref_init(&hw_sob_group->kref);
1133 }
1134 
1135 static void gaudi_sob_group_reset_error(struct kref *ref)
1136 {
1137 	struct gaudi_hw_sob_group *hw_sob_group =
1138 		container_of(ref, struct gaudi_hw_sob_group, kref);
1139 	struct hl_device *hdev = hw_sob_group->hdev;
1140 
1141 	dev_crit(hdev->dev,
1142 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1143 		hw_sob_group->base_sob_id);
1144 }
1145 
1146 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1147 {
1148 	struct gaudi_collective_properties *prop;
1149 	int i;
1150 
1151 	prop = &gaudi->collective_props;
1152 
1153 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1154 
1155 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1156 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1157 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1158 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1159 	/* Set collective engine bit */
1160 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 }
1163 
1164 static int gaudi_collective_init(struct hl_device *hdev)
1165 {
1166 	u32 i, sob_id, reserved_sobs_per_group;
1167 	struct gaudi_collective_properties *prop;
1168 	struct gaudi_device *gaudi;
1169 
1170 	gaudi = hdev->asic_specific;
1171 	prop = &gaudi->collective_props;
1172 	sob_id = hdev->asic_prop.collective_first_sob;
1173 
1174 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1175 	reserved_sobs_per_group =
1176 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1177 
1178 	/* Init SOB groups */
1179 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1180 		prop->hw_sob_group[i].hdev = hdev;
1181 		prop->hw_sob_group[i].base_sob_id = sob_id;
1182 		sob_id += reserved_sobs_per_group;
1183 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1184 	}
1185 
1186 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1187 		prop->next_sob_group_val[i] = 1;
1188 		prop->curr_sob_group_idx[i] = 0;
1189 		gaudi_collective_map_sobs(hdev, i);
1190 	}
1191 
1192 	gaudi_collective_mstr_sob_mask_set(gaudi);
1193 
1194 	return 0;
1195 }
1196 
1197 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1198 {
1199 	struct gaudi_device *gaudi = hdev->asic_specific;
1200 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1201 
1202 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1203 					gaudi_sob_group_hw_reset);
1204 }
1205 
1206 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1207 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1208 {
1209 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1210 	struct gaudi_collective_properties *cprop;
1211 	struct hl_gen_wait_properties wait_prop;
1212 	struct hl_sync_stream_properties *prop;
1213 	struct gaudi_device *gaudi;
1214 
1215 	gaudi = hdev->asic_specific;
1216 	cprop = &gaudi->collective_props;
1217 	queue_id = job->hw_queue_id;
1218 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1219 
1220 	master_sob_base =
1221 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1222 	master_monitor = prop->collective_mstr_mon_id[0];
1223 
1224 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1225 
1226 	dev_dbg(hdev->dev,
1227 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1228 		master_sob_base, cprop->mstr_sob_mask[0],
1229 		cprop->next_sob_group_val[stream],
1230 		master_monitor, queue_id);
1231 
1232 	wait_prop.data = (void *) job->patched_cb;
1233 	wait_prop.sob_base = master_sob_base;
1234 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1235 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1236 	wait_prop.mon_id = master_monitor;
1237 	wait_prop.q_idx = queue_id;
1238 	wait_prop.size = cb_size;
1239 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1240 
1241 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1242 	master_monitor = prop->collective_mstr_mon_id[1];
1243 
1244 	dev_dbg(hdev->dev,
1245 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1246 		master_sob_base, cprop->mstr_sob_mask[1],
1247 		cprop->next_sob_group_val[stream],
1248 		master_monitor, queue_id);
1249 
1250 	wait_prop.sob_base = master_sob_base;
1251 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1252 	wait_prop.mon_id = master_monitor;
1253 	wait_prop.size = cb_size;
1254 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1255 }
1256 
1257 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1258 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1259 {
1260 	struct hl_gen_wait_properties wait_prop;
1261 	struct hl_sync_stream_properties *prop;
1262 	u32 queue_id, cb_size = 0;
1263 
1264 	queue_id = job->hw_queue_id;
1265 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1266 
1267 	if (job->cs->encaps_signals) {
1268 		/* use the encaps signal handle store earlier in the flow
1269 		 * and set the SOB information from the encaps
1270 		 * signals handle
1271 		 */
1272 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1273 						cs_cmpl);
1274 
1275 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1276 				job->cs->sequence,
1277 				cs_cmpl->hw_sob->sob_id,
1278 				cs_cmpl->sob_val);
1279 	}
1280 
1281 	/* Add to wait CBs using slave monitor */
1282 	wait_prop.data = (void *) job->user_cb;
1283 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1284 	wait_prop.sob_mask = 0x1;
1285 	wait_prop.sob_val = cs_cmpl->sob_val;
1286 	wait_prop.mon_id = prop->collective_slave_mon_id;
1287 	wait_prop.q_idx = queue_id;
1288 	wait_prop.size = cb_size;
1289 
1290 	dev_dbg(hdev->dev,
1291 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1292 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1293 		prop->collective_slave_mon_id, queue_id);
1294 
1295 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1296 
1297 	dev_dbg(hdev->dev,
1298 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1299 		prop->collective_sob_id, queue_id);
1300 
1301 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1302 			prop->collective_sob_id, cb_size, false);
1303 }
1304 
1305 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1306 {
1307 	struct hl_cs_compl *signal_cs_cmpl =
1308 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1309 	struct hl_cs_compl *cs_cmpl =
1310 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1311 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1312 	struct gaudi_collective_properties *cprop;
1313 	u32 stream, queue_id, sob_group_offset;
1314 	struct gaudi_device *gaudi;
1315 	struct hl_device *hdev;
1316 	struct hl_cs_job *job;
1317 	struct hl_ctx *ctx;
1318 
1319 	ctx = cs->ctx;
1320 	hdev = ctx->hdev;
1321 	gaudi = hdev->asic_specific;
1322 	cprop = &gaudi->collective_props;
1323 
1324 	if (cs->encaps_signals) {
1325 		cs_cmpl->hw_sob = handle->hw_sob;
1326 		/* at this checkpoint we only need the hw_sob pointer
1327 		 * for the completion check before start going over the jobs
1328 		 * of the master/slaves, the sob_value will be taken later on
1329 		 * in gaudi_collective_slave_init_job depends on each
1330 		 * job wait offset value.
1331 		 */
1332 		cs_cmpl->sob_val = 0;
1333 	} else {
1334 		/* copy the SOB id and value of the signal CS */
1335 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1336 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1337 	}
1338 
1339 	/* check again if the signal cs already completed.
1340 	 * if yes then don't send any wait cs since the hw_sob
1341 	 * could be in reset already. if signal is not completed
1342 	 * then get refcount to hw_sob to prevent resetting the sob
1343 	 * while wait cs is not submitted.
1344 	 * note that this check is protected by two locks,
1345 	 * hw queue lock and completion object lock,
1346 	 * and the same completion object lock also protects
1347 	 * the hw_sob reset handler function.
1348 	 * The hw_queue lock prevent out of sync of hw_sob
1349 	 * refcount value, changed by signal/wait flows.
1350 	 */
1351 	spin_lock(&signal_cs_cmpl->lock);
1352 
1353 	if (completion_done(&cs->signal_fence->completion)) {
1354 		spin_unlock(&signal_cs_cmpl->lock);
1355 		return -EINVAL;
1356 	}
1357 	/* Increment kref since all slave queues are now waiting on it */
1358 	kref_get(&cs_cmpl->hw_sob->kref);
1359 
1360 	spin_unlock(&signal_cs_cmpl->lock);
1361 
1362 	/* Calculate the stream from collective master queue (1st job) */
1363 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1364 	stream = job->hw_queue_id % 4;
1365 	sob_group_offset =
1366 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1367 
1368 	list_for_each_entry(job, &cs->job_list, cs_node) {
1369 		queue_id = job->hw_queue_id;
1370 
1371 		if (hdev->kernel_queues[queue_id].collective_mode ==
1372 				HL_COLLECTIVE_MASTER)
1373 			gaudi_collective_master_init_job(hdev, job, stream,
1374 						sob_group_offset);
1375 		else
1376 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1377 	}
1378 
1379 	cs_cmpl->sob_group = sob_group_offset;
1380 
1381 	/* Handle sob group kref and wraparound */
1382 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1383 	cprop->next_sob_group_val[stream]++;
1384 
1385 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1386 		/*
1387 		 * Decrement as we reached the max value.
1388 		 * The release function won't be called here as we've
1389 		 * just incremented the refcount.
1390 		 */
1391 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1392 				gaudi_sob_group_reset_error);
1393 		cprop->next_sob_group_val[stream] = 1;
1394 		/* only two SOBs are currently in use */
1395 		cprop->curr_sob_group_idx[stream] =
1396 			(cprop->curr_sob_group_idx[stream] + 1) &
1397 							(HL_RSVD_SOBS - 1);
1398 
1399 		gaudi_collective_map_sobs(hdev, stream);
1400 
1401 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1402 				cprop->curr_sob_group_idx[stream], stream);
1403 	}
1404 
1405 	mb();
1406 	hl_fence_put(cs->signal_fence);
1407 	cs->signal_fence = NULL;
1408 
1409 	return 0;
1410 }
1411 
1412 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1413 {
1414 	u32 cacheline_end, additional_commands;
1415 
1416 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1417 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1418 
1419 	if (user_cb_size + additional_commands > cacheline_end)
1420 		return cacheline_end - user_cb_size + additional_commands;
1421 	else
1422 		return additional_commands;
1423 }
1424 
1425 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1426 		struct hl_ctx *ctx, struct hl_cs *cs,
1427 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1428 		u32 encaps_signal_offset)
1429 {
1430 	struct hw_queue_properties *hw_queue_prop;
1431 	struct hl_cs_counters_atomic *cntr;
1432 	struct hl_cs_job *job;
1433 	struct hl_cb *cb;
1434 	u32 cb_size;
1435 	bool patched_cb;
1436 
1437 	cntr = &hdev->aggregated_cs_counters;
1438 
1439 	if (mode == HL_COLLECTIVE_MASTER) {
1440 		/* CB size of collective master queue contains
1441 		 * 4 msg short packets for monitor 1 configuration
1442 		 * 1 fence packet
1443 		 * 4 msg short packets for monitor 2 configuration
1444 		 * 1 fence packet
1445 		 * 2 msg prot packets for completion and MSI
1446 		 */
1447 		cb_size = sizeof(struct packet_msg_short) * 8 +
1448 				sizeof(struct packet_fence) * 2 +
1449 				sizeof(struct packet_msg_prot) * 2;
1450 		patched_cb = true;
1451 	} else {
1452 		/* CB size of collective slave queues contains
1453 		 * 4 msg short packets for monitor configuration
1454 		 * 1 fence packet
1455 		 * 1 additional msg short packet for sob signal
1456 		 */
1457 		cb_size = sizeof(struct packet_msg_short) * 5 +
1458 				sizeof(struct packet_fence);
1459 		patched_cb = false;
1460 	}
1461 
1462 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1463 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1464 	if (!job) {
1465 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1466 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1467 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1468 		return -ENOMEM;
1469 	}
1470 
1471 	/* Allocate internal mapped CB for non patched CBs */
1472 	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1473 	if (!cb) {
1474 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1475 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1476 		kfree(job);
1477 		return -EFAULT;
1478 	}
1479 
1480 	job->id = 0;
1481 	job->cs = cs;
1482 	job->user_cb = cb;
1483 	atomic_inc(&job->user_cb->cs_cnt);
1484 	job->user_cb_size = cb_size;
1485 	job->hw_queue_id = queue_id;
1486 
1487 	/* since its guaranteed to have only one chunk in the collective wait
1488 	 * cs, we can use this chunk to set the encapsulated signal offset
1489 	 * in the jobs.
1490 	 */
1491 	if (cs->encaps_signals)
1492 		job->encaps_sig_wait_offset = encaps_signal_offset;
1493 
1494 	/*
1495 	 * No need in parsing, user CB is the patched CB.
1496 	 * We call hl_cb_destroy() out of two reasons - we don't need
1497 	 * the CB in the CB idr anymore and to decrement its refcount as
1498 	 * it was incremented inside hl_cb_kernel_create().
1499 	 */
1500 	if (patched_cb)
1501 		job->patched_cb = job->user_cb;
1502 	else
1503 		job->patched_cb = NULL;
1504 
1505 	job->job_cb_size = job->user_cb_size;
1506 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1507 
1508 	/* increment refcount as for external queues we get completion */
1509 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1510 		cs_get(cs);
1511 
1512 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1513 
1514 	list_add_tail(&job->cs_node, &cs->job_list);
1515 
1516 	hl_debugfs_add_job(hdev, job);
1517 
1518 	return 0;
1519 }
1520 
1521 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1522 		struct hl_ctx *ctx, struct hl_cs *cs,
1523 		u32 wait_queue_id, u32 collective_engine_id,
1524 		u32 encaps_signal_offset)
1525 {
1526 	struct gaudi_device *gaudi = hdev->asic_specific;
1527 	struct hw_queue_properties *hw_queue_prop;
1528 	u32 queue_id, collective_queue, num_jobs;
1529 	u32 stream, nic_queue, nic_idx = 0;
1530 	bool skip;
1531 	int i, rc = 0;
1532 
1533 	/* Verify wait queue id is configured as master */
1534 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1535 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1536 		dev_err(hdev->dev,
1537 			"Queue %d is not configured as collective master\n",
1538 			wait_queue_id);
1539 		return -EINVAL;
1540 	}
1541 
1542 	/* Verify engine id is supported */
1543 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1544 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1545 		dev_err(hdev->dev,
1546 			"Collective wait does not support engine %u\n",
1547 			collective_engine_id);
1548 		return -EINVAL;
1549 	}
1550 
1551 	stream = wait_queue_id % 4;
1552 
1553 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1554 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1555 	else
1556 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1557 
1558 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1559 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1560 
1561 	/* First job goes to the collective master queue, it will wait for
1562 	 * the collective slave queues to finish execution.
1563 	 * The synchronization is done using two monitors:
1564 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1565 	 * reduction engine (DMA5/TPC7).
1566 	 *
1567 	 * Rest of the jobs goes to the collective slave queues which will
1568 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1569 	 */
1570 	for (i = 0 ; i < num_jobs ; i++) {
1571 		if (i == 0) {
1572 			queue_id = wait_queue_id;
1573 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1574 				HL_COLLECTIVE_MASTER, queue_id,
1575 				wait_queue_id, encaps_signal_offset);
1576 		} else {
1577 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1578 				if (gaudi->hw_cap_initialized &
1579 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1580 					skip = false;
1581 				else
1582 					skip = true;
1583 
1584 				queue_id = nic_queue;
1585 				nic_queue += 4;
1586 				nic_idx++;
1587 
1588 				if (skip)
1589 					continue;
1590 			} else {
1591 				queue_id = collective_queue;
1592 			}
1593 
1594 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1595 				HL_COLLECTIVE_SLAVE, queue_id,
1596 				wait_queue_id, encaps_signal_offset);
1597 		}
1598 
1599 		if (rc)
1600 			return rc;
1601 	}
1602 
1603 	return rc;
1604 }
1605 
1606 static int gaudi_late_init(struct hl_device *hdev)
1607 {
1608 	struct gaudi_device *gaudi = hdev->asic_specific;
1609 	int rc;
1610 
1611 	rc = gaudi->cpucp_info_get(hdev);
1612 	if (rc) {
1613 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1614 		return rc;
1615 	}
1616 
1617 	if ((hdev->card_type == cpucp_card_type_pci) &&
1618 			(hdev->nic_ports_mask & 0x3)) {
1619 		dev_info(hdev->dev,
1620 			"PCI card detected, only 8 ports are enabled\n");
1621 		hdev->nic_ports_mask &= ~0x3;
1622 
1623 		/* Stop and disable unused NIC QMANs */
1624 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1625 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1626 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627 
1628 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1629 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1630 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1631 
1632 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1633 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1634 
1635 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1636 	}
1637 
1638 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1639 	if (rc) {
1640 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1641 		return rc;
1642 	}
1643 
1644 	/* Scrub both SRAM and DRAM */
1645 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1646 	if (rc)
1647 		goto disable_pci_access;
1648 
1649 	rc = gaudi_fetch_psoc_frequency(hdev);
1650 	if (rc) {
1651 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1652 		goto disable_pci_access;
1653 	}
1654 
1655 	rc = gaudi_mmu_clear_pgt_range(hdev);
1656 	if (rc) {
1657 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1658 		goto disable_pci_access;
1659 	}
1660 
1661 	rc = gaudi_init_tpc_mem(hdev);
1662 	if (rc) {
1663 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1664 		goto disable_pci_access;
1665 	}
1666 
1667 	rc = gaudi_collective_init(hdev);
1668 	if (rc) {
1669 		dev_err(hdev->dev, "Failed to init collective\n");
1670 		goto disable_pci_access;
1671 	}
1672 
1673 	/* We only support a single ASID for the user, so for the sake of optimization, just
1674 	 * initialize the ASID one time during device initialization with the fixed value of 1
1675 	 */
1676 	gaudi_mmu_prepare(hdev, 1);
1677 
1678 	hl_fw_set_pll_profile(hdev);
1679 
1680 	return 0;
1681 
1682 disable_pci_access:
1683 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1684 
1685 	return rc;
1686 }
1687 
1688 static void gaudi_late_fini(struct hl_device *hdev)
1689 {
1690 	hl_hwmon_release_resources(hdev);
1691 }
1692 
1693 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1694 {
1695 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1696 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1697 	int i, j, rc = 0;
1698 
1699 	/*
1700 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1701 	 * to '1' when accessing the host.
1702 	 * Bits 49:39 of the full host address are saved for a later
1703 	 * configuration of the HW to perform extension to 50 bits.
1704 	 * Because there is a single HW register that holds the extension bits,
1705 	 * these bits must be identical in all allocated range.
1706 	 */
1707 
1708 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1709 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 								&dma_addr_arr[i],
1711 								GFP_KERNEL | __GFP_ZERO);
1712 		if (!virt_addr_arr[i]) {
1713 			rc = -ENOMEM;
1714 			goto free_dma_mem_arr;
1715 		}
1716 
1717 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1718 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1719 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1720 			break;
1721 	}
1722 
1723 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1724 		dev_err(hdev->dev,
1725 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1726 		rc = -EFAULT;
1727 		goto free_dma_mem_arr;
1728 	}
1729 
1730 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1731 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1732 	hdev->cpu_pci_msb_addr =
1733 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1734 
1735 	if (!hdev->asic_prop.fw_security_enabled)
1736 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1737 
1738 free_dma_mem_arr:
1739 	for (j = 0 ; j < i ; j++)
1740 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1741 						dma_addr_arr[j]);
1742 
1743 	return rc;
1744 }
1745 
1746 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1747 {
1748 	struct gaudi_device *gaudi = hdev->asic_specific;
1749 	struct gaudi_internal_qman_info *q;
1750 	u32 i;
1751 
1752 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1753 		q = &gaudi->internal_qmans[i];
1754 		if (!q->pq_kernel_addr)
1755 			continue;
1756 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1757 	}
1758 }
1759 
1760 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1761 {
1762 	struct gaudi_device *gaudi = hdev->asic_specific;
1763 	struct gaudi_internal_qman_info *q;
1764 	int rc, i;
1765 
1766 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1767 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1768 			continue;
1769 
1770 		q = &gaudi->internal_qmans[i];
1771 
1772 		switch (i) {
1773 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1774 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1775 			break;
1776 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1777 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1778 			break;
1779 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1780 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1781 			break;
1782 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1783 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1784 			break;
1785 		default:
1786 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1787 			rc = -EINVAL;
1788 			goto free_internal_qmans_pq_mem;
1789 		}
1790 
1791 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1792 								GFP_KERNEL | __GFP_ZERO);
1793 		if (!q->pq_kernel_addr) {
1794 			rc = -ENOMEM;
1795 			goto free_internal_qmans_pq_mem;
1796 		}
1797 	}
1798 
1799 	return 0;
1800 
1801 free_internal_qmans_pq_mem:
1802 	gaudi_free_internal_qmans_pq_mem(hdev);
1803 	return rc;
1804 }
1805 
1806 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1807 {
1808 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1809 	struct pci_mem_region *region;
1810 
1811 	/* CFG */
1812 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1813 	region->region_base = CFG_BASE;
1814 	region->region_size = CFG_SIZE;
1815 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1816 	region->bar_size = CFG_BAR_SIZE;
1817 	region->bar_id = CFG_BAR_ID;
1818 	region->used = 1;
1819 
1820 	/* SRAM */
1821 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1822 	region->region_base = SRAM_BASE_ADDR;
1823 	region->region_size = SRAM_SIZE;
1824 	region->offset_in_bar = 0;
1825 	region->bar_size = SRAM_BAR_SIZE;
1826 	region->bar_id = SRAM_BAR_ID;
1827 	region->used = 1;
1828 
1829 	/* DRAM */
1830 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1831 	region->region_base = DRAM_PHYS_BASE;
1832 	region->region_size = hdev->asic_prop.dram_size;
1833 	region->offset_in_bar = 0;
1834 	region->bar_size = prop->dram_pci_bar_size;
1835 	region->bar_id = HBM_BAR_ID;
1836 	region->used = 1;
1837 
1838 	/* SP SRAM */
1839 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1840 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1841 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1842 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1843 	region->bar_size = CFG_BAR_SIZE;
1844 	region->bar_id = CFG_BAR_ID;
1845 	region->used = 1;
1846 }
1847 
1848 static int gaudi_sw_init(struct hl_device *hdev)
1849 {
1850 	struct gaudi_device *gaudi;
1851 	u32 i, event_id = 0;
1852 	int rc;
1853 
1854 	/* Allocate device structure */
1855 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1856 	if (!gaudi)
1857 		return -ENOMEM;
1858 
1859 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1860 		if (gaudi_irq_map_table[i].valid) {
1861 			if (event_id == GAUDI_EVENT_SIZE) {
1862 				dev_err(hdev->dev,
1863 					"Event array exceeds the limit of %u events\n",
1864 					GAUDI_EVENT_SIZE);
1865 				rc = -EINVAL;
1866 				goto free_gaudi_device;
1867 			}
1868 
1869 			gaudi->events[event_id++] =
1870 					gaudi_irq_map_table[i].fc_id;
1871 		}
1872 	}
1873 
1874 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1875 
1876 	hdev->asic_specific = gaudi;
1877 
1878 	/* Create DMA pool for small allocations */
1879 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1880 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1881 	if (!hdev->dma_pool) {
1882 		dev_err(hdev->dev, "failed to create DMA pool\n");
1883 		rc = -ENOMEM;
1884 		goto free_gaudi_device;
1885 	}
1886 
1887 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1888 	if (rc)
1889 		goto free_dma_pool;
1890 
1891 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1892 	if (!hdev->cpu_accessible_dma_pool) {
1893 		dev_err(hdev->dev,
1894 			"Failed to create CPU accessible DMA pool\n");
1895 		rc = -ENOMEM;
1896 		goto free_cpu_dma_mem;
1897 	}
1898 
1899 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1900 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1901 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1902 	if (rc) {
1903 		dev_err(hdev->dev,
1904 			"Failed to add memory to CPU accessible DMA pool\n");
1905 		rc = -EFAULT;
1906 		goto free_cpu_accessible_dma_pool;
1907 	}
1908 
1909 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1910 	if (rc)
1911 		goto free_cpu_accessible_dma_pool;
1912 
1913 	spin_lock_init(&gaudi->hw_queues_lock);
1914 
1915 	hdev->supports_sync_stream = true;
1916 	hdev->supports_coresight = true;
1917 	hdev->supports_staged_submission = true;
1918 	hdev->supports_wait_for_multi_cs = true;
1919 
1920 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1921 	hdev->stream_master_qid_arr =
1922 				hdev->asic_funcs->get_stream_master_qid_arr();
1923 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1924 
1925 	return 0;
1926 
1927 free_cpu_accessible_dma_pool:
1928 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1929 free_cpu_dma_mem:
1930 	if (!hdev->asic_prop.fw_security_enabled)
1931 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1932 					hdev->cpu_pci_msb_addr);
1933 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1934 					hdev->cpu_accessible_dma_address);
1935 free_dma_pool:
1936 	dma_pool_destroy(hdev->dma_pool);
1937 free_gaudi_device:
1938 	kfree(gaudi);
1939 	return rc;
1940 }
1941 
1942 static int gaudi_sw_fini(struct hl_device *hdev)
1943 {
1944 	struct gaudi_device *gaudi = hdev->asic_specific;
1945 
1946 	gaudi_free_internal_qmans_pq_mem(hdev);
1947 
1948 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1949 
1950 	if (!hdev->asic_prop.fw_security_enabled)
1951 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1952 					hdev->cpu_pci_msb_addr);
1953 
1954 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1955 					hdev->cpu_accessible_dma_address);
1956 
1957 	dma_pool_destroy(hdev->dma_pool);
1958 
1959 	kfree(gaudi);
1960 
1961 	return 0;
1962 }
1963 
1964 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1965 {
1966 	struct hl_device *hdev = arg;
1967 	int i;
1968 
1969 	if (hdev->disabled)
1970 		return IRQ_HANDLED;
1971 
1972 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1973 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1974 
1975 	hl_irq_handler_eq(irq, &hdev->event_queue);
1976 
1977 	return IRQ_HANDLED;
1978 }
1979 
1980 /*
1981  * For backward compatibility, new MSI interrupts should be set after the
1982  * existing CPU and NIC interrupts.
1983  */
1984 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1985 				bool cpu_eq)
1986 {
1987 	int msi_vec;
1988 
1989 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1990 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1991 				GAUDI_EVENT_QUEUE_MSI_IDX);
1992 
1993 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1994 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1995 
1996 	return pci_irq_vector(hdev->pdev, msi_vec);
1997 }
1998 
1999 static int gaudi_enable_msi_single(struct hl_device *hdev)
2000 {
2001 	int rc, irq;
2002 
2003 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2004 
2005 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2006 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2007 			"gaudi single msi", hdev);
2008 	if (rc)
2009 		dev_err(hdev->dev,
2010 			"Failed to request single MSI IRQ\n");
2011 
2012 	return rc;
2013 }
2014 
2015 static int gaudi_enable_msi(struct hl_device *hdev)
2016 {
2017 	struct gaudi_device *gaudi = hdev->asic_specific;
2018 	int rc;
2019 
2020 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2021 		return 0;
2022 
2023 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2024 	if (rc < 0) {
2025 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2026 		return rc;
2027 	}
2028 
2029 	rc = gaudi_enable_msi_single(hdev);
2030 	if (rc)
2031 		goto free_pci_irq_vectors;
2032 
2033 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2034 
2035 	return 0;
2036 
2037 free_pci_irq_vectors:
2038 	pci_free_irq_vectors(hdev->pdev);
2039 	return rc;
2040 }
2041 
2042 static void gaudi_sync_irqs(struct hl_device *hdev)
2043 {
2044 	struct gaudi_device *gaudi = hdev->asic_specific;
2045 
2046 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2047 		return;
2048 
2049 	/* Wait for all pending IRQs to be finished */
2050 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2051 }
2052 
2053 static void gaudi_disable_msi(struct hl_device *hdev)
2054 {
2055 	struct gaudi_device *gaudi = hdev->asic_specific;
2056 
2057 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2058 		return;
2059 
2060 	gaudi_sync_irqs(hdev);
2061 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2062 	pci_free_irq_vectors(hdev->pdev);
2063 
2064 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2065 }
2066 
2067 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2068 {
2069 	struct gaudi_device *gaudi = hdev->asic_specific;
2070 
2071 	if (hdev->asic_prop.fw_security_enabled)
2072 		return;
2073 
2074 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2075 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2076 		return;
2077 
2078 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2079 		return;
2080 
2081 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2082 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2084 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2086 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2088 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2090 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2092 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2094 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2096 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097 
2098 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2099 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2101 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2103 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2105 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2107 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2109 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2111 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2113 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114 
2115 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2116 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2118 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2120 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2122 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2124 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2126 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2128 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2130 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131 
2132 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2133 }
2134 
2135 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2136 {
2137 	struct gaudi_device *gaudi = hdev->asic_specific;
2138 
2139 	if (hdev->asic_prop.fw_security_enabled)
2140 		return;
2141 
2142 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2143 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2144 		return;
2145 
2146 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2147 		return;
2148 
2149 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2150 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2151 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2152 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2154 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2156 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 
2166 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2167 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2169 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2171 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2173 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2175 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2177 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2179 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2181 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182 
2183 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2184 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2185 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2186 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2188 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2190 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2192 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2194 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2196 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2198 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199 
2200 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2201 }
2202 
2203 static void gaudi_init_e2e(struct hl_device *hdev)
2204 {
2205 	if (hdev->asic_prop.fw_security_enabled)
2206 		return;
2207 
2208 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2209 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2210 		return;
2211 
2212 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2213 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2214 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2215 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2216 
2217 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2218 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2219 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2220 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2221 
2222 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2223 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2224 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2225 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2226 
2227 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2228 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2229 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2230 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2231 
2232 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2233 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2234 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2235 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2236 
2237 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2238 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2239 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2240 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2241 
2242 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2243 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2244 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2245 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2246 
2247 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2248 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2249 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2250 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2251 
2252 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2253 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2254 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2255 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2256 
2257 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2258 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2259 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2260 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2261 
2262 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2263 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2264 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2265 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2266 
2267 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2268 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2269 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2270 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2271 
2272 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2273 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2274 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2275 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2276 
2277 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2278 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2279 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2280 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2281 
2282 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2283 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2284 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2285 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2286 
2287 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2288 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2289 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2290 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2291 
2292 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2296 
2297 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2301 
2302 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2303 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2304 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2305 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2306 
2307 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2308 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2309 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2310 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2311 
2312 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2313 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2314 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2315 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2316 
2317 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2318 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2319 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2320 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2321 
2322 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2323 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2324 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2325 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2326 
2327 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2328 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2329 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2330 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2331 
2332 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336 
2337 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341 
2342 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346 
2347 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351 
2352 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356 
2357 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361 
2362 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366 
2367 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371 
2372 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376 
2377 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381 
2382 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386 
2387 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2391 
2392 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2396 
2397 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2401 
2402 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2406 
2407 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2411 
2412 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416 
2417 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421 
2422 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426 
2427 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2431 
2432 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2436 
2437 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2441 
2442 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2446 
2447 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2451 }
2452 
2453 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2454 {
2455 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2456 
2457 	if (hdev->asic_prop.fw_security_enabled)
2458 		return;
2459 
2460 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2462 		return;
2463 
2464 	hbm0_wr = 0x33333333;
2465 	hbm0_rd = 0x77777777;
2466 	hbm1_wr = 0x55555555;
2467 	hbm1_rd = 0xDDDDDDDD;
2468 
2469 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2473 
2474 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2478 
2479 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2483 
2484 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2488 
2489 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501 
2502 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 }
2515 
2516 static void gaudi_init_golden_registers(struct hl_device *hdev)
2517 {
2518 	u32 tpc_offset;
2519 	int tpc_id, i;
2520 
2521 	gaudi_init_e2e(hdev);
2522 	gaudi_init_hbm_cred(hdev);
2523 
2524 	for (tpc_id = 0, tpc_offset = 0;
2525 				tpc_id < TPC_NUMBER_OF_ENGINES;
2526 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527 		/* Mask all arithmetic interrupts from TPC */
2528 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2529 		/* Set 16 cache lines */
2530 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531 				ICACHE_FETCH_LINE_NUM, 2);
2532 	}
2533 
2534 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535 	for (i = 0 ; i < 128 ; i += 8)
2536 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2537 
2538 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 }
2543 
2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545 					int qman_id, dma_addr_t qman_pq_addr)
2546 {
2547 	struct cpu_dyn_regs *dyn_regs =
2548 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551 	u32 q_off, dma_qm_offset;
2552 	u32 dma_qm_err_cfg, irq_handler_offset;
2553 
2554 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555 
2556 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560 	so_base_en_lo = lower_32_bits(CFG_BASE +
2561 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562 	so_base_en_hi = upper_32_bits(CFG_BASE +
2563 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2569 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2571 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572 
2573 	q_off = dma_qm_offset + qman_id * 4;
2574 
2575 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2577 
2578 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2581 
2582 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584 							QMAN_LDMA_SRC_OFFSET);
2585 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586 							QMAN_LDMA_DST_OFFSET);
2587 
2588 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2596 
2597 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2598 
2599 	/* The following configuration is needed only once per QMAN */
2600 	if (qman_id == 0) {
2601 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2604 
2605 		/* Configure RAZWI IRQ */
2606 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607 		if (hdev->stop_on_err)
2608 			dma_qm_err_cfg |=
2609 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2610 
2611 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2612 
2613 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614 			lower_32_bits(CFG_BASE + irq_handler_offset));
2615 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616 			upper_32_bits(CFG_BASE + irq_handler_offset));
2617 
2618 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2620 									dma_id);
2621 
2622 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623 				QM_ARB_ERR_MSG_EN_MASK);
2624 
2625 		/* Set timeout to maximum */
2626 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2627 
2628 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2629 				QMAN_EXTERNAL_MAKE_TRUSTED);
2630 
2631 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2632 	}
2633 }
2634 
2635 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2636 {
2637 	struct cpu_dyn_regs *dyn_regs =
2638 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2639 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2640 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2641 	u32 irq_handler_offset;
2642 
2643 	/* Set to maximum possible according to physical size */
2644 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2645 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2646 
2647 	/* WA for H/W bug H3-2116 */
2648 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2649 
2650 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2651 	if (hdev->stop_on_err)
2652 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2653 
2654 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2655 
2656 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2657 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2658 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2659 
2660 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2661 		lower_32_bits(CFG_BASE + irq_handler_offset));
2662 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2663 		upper_32_bits(CFG_BASE + irq_handler_offset));
2664 
2665 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2666 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2667 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2668 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2669 	/* If the channel is secured, it should be in MMU bypass mode */
2670 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2671 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2672 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2673 }
2674 
2675 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2676 				u32 enable_mask)
2677 {
2678 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2679 
2680 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2681 }
2682 
2683 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2684 {
2685 	struct gaudi_device *gaudi = hdev->asic_specific;
2686 	struct hl_hw_queue *q;
2687 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2688 
2689 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2690 		return;
2691 
2692 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2693 		dma_id = gaudi_dma_assignment[i];
2694 		/*
2695 		 * For queues after the CPU Q need to add 1 to get the correct
2696 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2697 		 * order to get the correct MSI register.
2698 		 */
2699 		if (dma_id > 1) {
2700 			cpu_skip = 1;
2701 			nic_skip = NIC_NUMBER_OF_ENGINES;
2702 		} else {
2703 			cpu_skip = 0;
2704 			nic_skip = 0;
2705 		}
2706 
2707 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2708 			q_idx = 4 * dma_id + j + cpu_skip;
2709 			q = &hdev->kernel_queues[q_idx];
2710 			q->cq_id = cq_id++;
2711 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2712 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2713 						q->bus_address);
2714 		}
2715 
2716 		gaudi_init_dma_core(hdev, dma_id);
2717 
2718 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2719 	}
2720 
2721 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2722 }
2723 
2724 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2725 					int qman_id, u64 qman_base_addr)
2726 {
2727 	struct cpu_dyn_regs *dyn_regs =
2728 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2729 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2730 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2731 	u32 dma_qm_err_cfg, irq_handler_offset;
2732 	u32 q_off, dma_qm_offset;
2733 
2734 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2735 
2736 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2737 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2738 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2739 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740 	so_base_en_lo = lower_32_bits(CFG_BASE +
2741 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2742 	so_base_en_hi = upper_32_bits(CFG_BASE +
2743 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2745 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2747 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2749 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2751 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752 
2753 	q_off = dma_qm_offset + qman_id * 4;
2754 
2755 	if (qman_id < 4) {
2756 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2757 					lower_32_bits(qman_base_addr));
2758 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2759 					upper_32_bits(qman_base_addr));
2760 
2761 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2762 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764 
2765 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2766 							QMAN_CPDMA_SIZE_OFFSET);
2767 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2768 							QMAN_CPDMA_SRC_OFFSET);
2769 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2770 							QMAN_CPDMA_DST_OFFSET);
2771 	} else {
2772 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2773 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2774 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2775 
2776 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2777 							QMAN_LDMA_SIZE_OFFSET);
2778 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2779 							QMAN_LDMA_SRC_OFFSET);
2780 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2781 							QMAN_LDMA_DST_OFFSET);
2782 
2783 		/* Configure RAZWI IRQ */
2784 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2785 		if (hdev->stop_on_err)
2786 			dma_qm_err_cfg |=
2787 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2788 
2789 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2790 
2791 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2792 			lower_32_bits(CFG_BASE + irq_handler_offset));
2793 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2794 			upper_32_bits(CFG_BASE + irq_handler_offset));
2795 
2796 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2797 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2798 									dma_id);
2799 
2800 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2801 				QM_ARB_ERR_MSG_EN_MASK);
2802 
2803 		/* Set timeout to maximum */
2804 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2805 
2806 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808 				QMAN_INTERNAL_MAKE_TRUSTED);
2809 	}
2810 
2811 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815 
2816 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819 				mtr_base_ws_lo);
2820 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821 				mtr_base_ws_hi);
2822 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823 				so_base_ws_lo);
2824 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825 				so_base_ws_hi);
2826 	}
2827 }
2828 
2829 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830 {
2831 	struct gaudi_device *gaudi = hdev->asic_specific;
2832 	struct gaudi_internal_qman_info *q;
2833 	u64 qman_base_addr;
2834 	int i, j, dma_id, internal_q_index;
2835 
2836 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837 		return;
2838 
2839 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841 
2842 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843 			 /*
2844 			  * Add the CPU queue in order to get the correct queue
2845 			  * number as all internal queue are placed after it
2846 			  */
2847 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848 
2849 			q = &gaudi->internal_qmans[internal_q_index];
2850 			qman_base_addr = (u64) q->pq_dma_addr;
2851 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852 						qman_base_addr);
2853 		}
2854 
2855 		/* Initializing lower CP for HBM DMA QMAN */
2856 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857 
2858 		gaudi_init_dma_core(hdev, dma_id);
2859 
2860 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861 	}
2862 
2863 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864 }
2865 
2866 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867 					int qman_id, u64 qman_base_addr)
2868 {
2869 	struct cpu_dyn_regs *dyn_regs =
2870 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871 	u32 mtr_base_lo, mtr_base_hi;
2872 	u32 so_base_lo, so_base_hi;
2873 	u32 irq_handler_offset;
2874 	u32 q_off, mme_id;
2875 	u32 mme_qm_err_cfg;
2876 
2877 	mtr_base_lo = lower_32_bits(CFG_BASE +
2878 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879 	mtr_base_hi = upper_32_bits(CFG_BASE +
2880 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 	so_base_lo = lower_32_bits(CFG_BASE +
2882 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883 	so_base_hi = upper_32_bits(CFG_BASE +
2884 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885 
2886 	q_off = mme_offset + qman_id * 4;
2887 
2888 	if (qman_id < 4) {
2889 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890 					lower_32_bits(qman_base_addr));
2891 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892 					upper_32_bits(qman_base_addr));
2893 
2894 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897 
2898 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899 							QMAN_CPDMA_SIZE_OFFSET);
2900 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901 							QMAN_CPDMA_SRC_OFFSET);
2902 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903 							QMAN_CPDMA_DST_OFFSET);
2904 	} else {
2905 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908 
2909 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910 							QMAN_LDMA_SIZE_OFFSET);
2911 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912 							QMAN_LDMA_SRC_OFFSET);
2913 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914 							QMAN_LDMA_DST_OFFSET);
2915 
2916 		/* Configure RAZWI IRQ */
2917 		mme_id = mme_offset /
2918 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919 
2920 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921 		if (hdev->stop_on_err)
2922 			mme_qm_err_cfg |=
2923 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924 
2925 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926 
2927 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928 			lower_32_bits(CFG_BASE + irq_handler_offset));
2929 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930 			upper_32_bits(CFG_BASE + irq_handler_offset));
2931 
2932 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934 									mme_id);
2935 
2936 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937 				QM_ARB_ERR_MSG_EN_MASK);
2938 
2939 		/* Set timeout to maximum */
2940 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2941 
2942 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2943 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2944 				QMAN_INTERNAL_MAKE_TRUSTED);
2945 	}
2946 
2947 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2948 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2949 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2950 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2951 }
2952 
2953 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2954 {
2955 	struct gaudi_device *gaudi = hdev->asic_specific;
2956 	struct gaudi_internal_qman_info *q;
2957 	u64 qman_base_addr;
2958 	u32 mme_offset;
2959 	int i, internal_q_index;
2960 
2961 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2962 		return;
2963 
2964 	/*
2965 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2966 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2967 	 */
2968 
2969 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2970 
2971 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2972 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2973 		q = &gaudi->internal_qmans[internal_q_index];
2974 		qman_base_addr = (u64) q->pq_dma_addr;
2975 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2976 					qman_base_addr);
2977 		if (i == 3)
2978 			mme_offset = 0;
2979 	}
2980 
2981 	/* Initializing lower CP for MME QMANs */
2982 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2983 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2984 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2985 
2986 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2987 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 
2989 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2990 }
2991 
2992 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2993 				int qman_id, u64 qman_base_addr)
2994 {
2995 	struct cpu_dyn_regs *dyn_regs =
2996 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2997 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2998 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2999 	u32 tpc_qm_err_cfg, irq_handler_offset;
3000 	u32 q_off, tpc_id;
3001 
3002 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3003 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3004 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3005 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006 	so_base_en_lo = lower_32_bits(CFG_BASE +
3007 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3008 	so_base_en_hi = upper_32_bits(CFG_BASE +
3009 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3011 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3012 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3013 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3015 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3017 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018 
3019 	q_off = tpc_offset + qman_id * 4;
3020 
3021 	tpc_id = tpc_offset /
3022 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3023 
3024 	if (qman_id < 4) {
3025 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3026 					lower_32_bits(qman_base_addr));
3027 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3028 					upper_32_bits(qman_base_addr));
3029 
3030 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3031 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3032 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3033 
3034 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3035 							QMAN_CPDMA_SIZE_OFFSET);
3036 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3037 							QMAN_CPDMA_SRC_OFFSET);
3038 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3039 							QMAN_CPDMA_DST_OFFSET);
3040 	} else {
3041 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3042 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3043 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3044 
3045 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3046 							QMAN_LDMA_SIZE_OFFSET);
3047 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3048 							QMAN_LDMA_SRC_OFFSET);
3049 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3050 							QMAN_LDMA_DST_OFFSET);
3051 
3052 		/* Configure RAZWI IRQ */
3053 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3054 		if (hdev->stop_on_err)
3055 			tpc_qm_err_cfg |=
3056 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3057 
3058 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3059 
3060 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3061 			lower_32_bits(CFG_BASE + irq_handler_offset));
3062 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3063 			upper_32_bits(CFG_BASE + irq_handler_offset));
3064 
3065 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3066 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3067 									tpc_id);
3068 
3069 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3070 				QM_ARB_ERR_MSG_EN_MASK);
3071 
3072 		/* Set timeout to maximum */
3073 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3074 
3075 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3076 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3077 				QMAN_INTERNAL_MAKE_TRUSTED);
3078 	}
3079 
3080 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3081 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3082 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3083 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3084 
3085 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3086 	if (tpc_id == 6) {
3087 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3088 				mtr_base_ws_lo);
3089 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3090 				mtr_base_ws_hi);
3091 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3092 				so_base_ws_lo);
3093 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3094 				so_base_ws_hi);
3095 	}
3096 }
3097 
3098 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3099 {
3100 	struct gaudi_device *gaudi = hdev->asic_specific;
3101 	struct gaudi_internal_qman_info *q;
3102 	u64 qman_base_addr;
3103 	u32 so_base_hi, tpc_offset = 0;
3104 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3105 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3106 	int i, tpc_id, internal_q_index;
3107 
3108 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3109 		return;
3110 
3111 	so_base_hi = upper_32_bits(CFG_BASE +
3112 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3113 
3114 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3115 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3116 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3117 						tpc_id * QMAN_STREAMS + i;
3118 			q = &gaudi->internal_qmans[internal_q_index];
3119 			qman_base_addr = (u64) q->pq_dma_addr;
3120 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3121 						qman_base_addr);
3122 
3123 			if (i == 3) {
3124 				/* Initializing lower CP for TPC QMAN */
3125 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3126 
3127 				/* Enable the QMAN and TPC channel */
3128 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3129 						QMAN_TPC_ENABLE);
3130 			}
3131 		}
3132 
3133 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3134 				so_base_hi);
3135 
3136 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3137 
3138 		gaudi->hw_cap_initialized |=
3139 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3140 	}
3141 }
3142 
3143 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3144 				int qman_id, u64 qman_base_addr, int nic_id)
3145 {
3146 	struct cpu_dyn_regs *dyn_regs =
3147 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3148 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3149 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3150 	u32 nic_qm_err_cfg, irq_handler_offset;
3151 	u32 q_off;
3152 
3153 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3154 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3155 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3156 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3159 	so_base_en_hi = upper_32_bits(CFG_BASE +
3160 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3164 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3168 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169 
3170 	q_off = nic_offset + qman_id * 4;
3171 
3172 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3173 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3174 
3175 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3176 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3177 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3178 
3179 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3180 							QMAN_LDMA_SIZE_OFFSET);
3181 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3182 							QMAN_LDMA_SRC_OFFSET);
3183 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3184 							QMAN_LDMA_DST_OFFSET);
3185 
3186 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3187 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3188 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3189 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3190 
3191 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3192 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3193 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3194 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3195 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3196 
3197 	if (qman_id == 0) {
3198 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3199 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3200 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3201 
3202 		/* Configure RAZWI IRQ */
3203 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3204 		if (hdev->stop_on_err)
3205 			nic_qm_err_cfg |=
3206 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3207 
3208 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3209 
3210 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3211 			lower_32_bits(CFG_BASE + irq_handler_offset));
3212 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3213 			upper_32_bits(CFG_BASE + irq_handler_offset));
3214 
3215 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3216 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3217 									nic_id);
3218 
3219 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3220 				QM_ARB_ERR_MSG_EN_MASK);
3221 
3222 		/* Set timeout to maximum */
3223 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3224 
3225 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3226 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3227 				QMAN_INTERNAL_MAKE_TRUSTED);
3228 	}
3229 }
3230 
3231 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3232 {
3233 	struct gaudi_device *gaudi = hdev->asic_specific;
3234 	struct gaudi_internal_qman_info *q;
3235 	u64 qman_base_addr;
3236 	u32 nic_offset = 0;
3237 	u32 nic_delta_between_qmans =
3238 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3239 	u32 nic_delta_between_nics =
3240 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241 	int i, nic_id, internal_q_index;
3242 
3243 	if (!hdev->nic_ports_mask)
3244 		return;
3245 
3246 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3247 		return;
3248 
3249 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3250 
3251 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3252 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3253 			nic_offset += nic_delta_between_qmans;
3254 			if (nic_id & 1) {
3255 				nic_offset -= (nic_delta_between_qmans * 2);
3256 				nic_offset += nic_delta_between_nics;
3257 			}
3258 			continue;
3259 		}
3260 
3261 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3262 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3263 						nic_id * QMAN_STREAMS + i;
3264 			q = &gaudi->internal_qmans[internal_q_index];
3265 			qman_base_addr = (u64) q->pq_dma_addr;
3266 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3267 						qman_base_addr, nic_id);
3268 		}
3269 
3270 		/* Enable the QMAN */
3271 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3272 
3273 		nic_offset += nic_delta_between_qmans;
3274 		if (nic_id & 1) {
3275 			nic_offset -= (nic_delta_between_qmans * 2);
3276 			nic_offset += nic_delta_between_nics;
3277 		}
3278 
3279 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3280 	}
3281 }
3282 
3283 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3284 {
3285 	struct gaudi_device *gaudi = hdev->asic_specific;
3286 
3287 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3288 		return;
3289 
3290 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3291 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3292 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3293 }
3294 
3295 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3296 {
3297 	struct gaudi_device *gaudi = hdev->asic_specific;
3298 
3299 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3300 		return;
3301 
3302 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3303 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3304 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3305 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3306 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3307 }
3308 
3309 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3310 {
3311 	struct gaudi_device *gaudi = hdev->asic_specific;
3312 
3313 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3314 		return;
3315 
3316 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3317 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3318 }
3319 
3320 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3321 {
3322 	struct gaudi_device *gaudi = hdev->asic_specific;
3323 	u32 tpc_offset = 0;
3324 	int tpc_id;
3325 
3326 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3327 		return;
3328 
3329 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3330 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3331 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3332 	}
3333 }
3334 
3335 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3336 {
3337 	struct gaudi_device *gaudi = hdev->asic_specific;
3338 	u32 nic_mask, nic_offset = 0;
3339 	u32 nic_delta_between_qmans =
3340 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3341 	u32 nic_delta_between_nics =
3342 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343 	int nic_id;
3344 
3345 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3346 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3347 
3348 		if (gaudi->hw_cap_initialized & nic_mask)
3349 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3350 
3351 		nic_offset += nic_delta_between_qmans;
3352 		if (nic_id & 1) {
3353 			nic_offset -= (nic_delta_between_qmans * 2);
3354 			nic_offset += nic_delta_between_nics;
3355 		}
3356 	}
3357 }
3358 
3359 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3360 {
3361 	struct gaudi_device *gaudi = hdev->asic_specific;
3362 
3363 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3364 		return;
3365 
3366 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3367 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3368 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 }
3371 
3372 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3373 {
3374 	struct gaudi_device *gaudi = hdev->asic_specific;
3375 
3376 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3377 		return;
3378 
3379 	/* Stop CPs of HBM DMA QMANs */
3380 
3381 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3382 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 }
3387 
3388 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3389 {
3390 	struct gaudi_device *gaudi = hdev->asic_specific;
3391 
3392 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3393 		return;
3394 
3395 	/* Stop CPs of MME QMANs */
3396 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3397 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 }
3399 
3400 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3401 {
3402 	struct gaudi_device *gaudi = hdev->asic_specific;
3403 
3404 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3405 		return;
3406 
3407 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3408 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 }
3416 
3417 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3418 {
3419 	struct gaudi_device *gaudi = hdev->asic_specific;
3420 
3421 	/* Stop upper CPs of QMANs */
3422 
3423 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3424 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3425 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3426 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3427 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3428 
3429 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3430 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3431 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3432 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3433 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3434 
3435 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3436 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3437 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3438 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3439 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3440 
3441 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3442 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3443 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3444 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3445 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3446 
3447 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3448 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3449 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3450 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3451 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3452 
3453 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3454 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3455 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3456 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3457 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3458 
3459 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3460 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3461 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3462 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3463 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3464 
3465 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3466 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3467 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3468 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3469 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3470 
3471 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3472 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3473 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3474 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3475 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3476 
3477 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3478 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3479 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3480 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3481 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3482 }
3483 
3484 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3485 {
3486 	struct gaudi_device *gaudi = hdev->asic_specific;
3487 
3488 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3489 		return;
3490 
3491 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3492 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 }
3495 
3496 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3497 {
3498 	struct gaudi_device *gaudi = hdev->asic_specific;
3499 
3500 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3501 		return;
3502 
3503 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3504 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 }
3509 
3510 static void gaudi_mme_stall(struct hl_device *hdev)
3511 {
3512 	struct gaudi_device *gaudi = hdev->asic_specific;
3513 
3514 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3515 		return;
3516 
3517 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3518 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3519 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3521 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 }
3535 
3536 static void gaudi_tpc_stall(struct hl_device *hdev)
3537 {
3538 	struct gaudi_device *gaudi = hdev->asic_specific;
3539 
3540 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3541 		return;
3542 
3543 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3544 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 }
3552 
3553 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3554 {
3555 	u32 qman_offset;
3556 	int i;
3557 
3558 	if (hdev->asic_prop.fw_security_enabled)
3559 		return;
3560 
3561 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3562 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3563 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3564 
3565 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3566 	}
3567 
3568 	WREG32(mmMME0_QM_CGM_CFG, 0);
3569 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3570 	WREG32(mmMME2_QM_CGM_CFG, 0);
3571 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3572 
3573 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3574 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3575 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3576 
3577 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3578 	}
3579 }
3580 
3581 static void gaudi_enable_timestamp(struct hl_device *hdev)
3582 {
3583 	/* Disable the timestamp counter */
3584 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3585 
3586 	/* Zero the lower/upper parts of the 64-bit counter */
3587 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3588 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3589 
3590 	/* Enable the counter */
3591 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3592 }
3593 
3594 static void gaudi_disable_timestamp(struct hl_device *hdev)
3595 {
3596 	/* Disable the timestamp counter */
3597 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3598 }
3599 
3600 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3601 {
3602 	u32 wait_timeout_ms;
3603 
3604 	if (hdev->pldm)
3605 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3606 	else
3607 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3608 
3609 	if (fw_reset)
3610 		goto skip_engines;
3611 
3612 	gaudi_stop_nic_qmans(hdev);
3613 	gaudi_stop_mme_qmans(hdev);
3614 	gaudi_stop_tpc_qmans(hdev);
3615 	gaudi_stop_hbm_dma_qmans(hdev);
3616 	gaudi_stop_pci_dma_qmans(hdev);
3617 
3618 	msleep(wait_timeout_ms);
3619 
3620 	gaudi_pci_dma_stall(hdev);
3621 	gaudi_hbm_dma_stall(hdev);
3622 	gaudi_tpc_stall(hdev);
3623 	gaudi_mme_stall(hdev);
3624 
3625 	msleep(wait_timeout_ms);
3626 
3627 	gaudi_disable_nic_qmans(hdev);
3628 	gaudi_disable_mme_qmans(hdev);
3629 	gaudi_disable_tpc_qmans(hdev);
3630 	gaudi_disable_hbm_dma_qmans(hdev);
3631 	gaudi_disable_pci_dma_qmans(hdev);
3632 
3633 	gaudi_disable_timestamp(hdev);
3634 
3635 skip_engines:
3636 	gaudi_disable_msi(hdev);
3637 }
3638 
3639 static int gaudi_mmu_init(struct hl_device *hdev)
3640 {
3641 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3642 	struct gaudi_device *gaudi = hdev->asic_specific;
3643 	u64 hop0_addr;
3644 	int rc, i;
3645 
3646 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3647 		return 0;
3648 
3649 	for (i = 0 ; i < prop->max_asid ; i++) {
3650 		hop0_addr = prop->mmu_pgt_addr +
3651 				(i * prop->mmu_hop_table_size);
3652 
3653 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3654 		if (rc) {
3655 			dev_err(hdev->dev,
3656 				"failed to set hop0 addr for asid %d\n", i);
3657 			return rc;
3658 		}
3659 	}
3660 
3661 	/* init MMU cache manage page */
3662 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3663 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3664 
3665 	/* mem cache invalidation */
3666 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3667 
3668 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3669 	if (rc)
3670 		return rc;
3671 
3672 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3673 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3674 
3675 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3676 
3677 	/*
3678 	 * The H/W expects the first PI after init to be 1. After wraparound
3679 	 * we'll write 0.
3680 	 */
3681 	gaudi->mmu_cache_inv_pi = 1;
3682 
3683 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3684 
3685 	return 0;
3686 }
3687 
3688 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3689 {
3690 	void __iomem *dst;
3691 
3692 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3693 
3694 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3695 }
3696 
3697 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3698 {
3699 	void __iomem *dst;
3700 
3701 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3702 
3703 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3704 }
3705 
3706 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3707 {
3708 	struct dynamic_fw_load_mgr *dynamic_loader;
3709 	struct cpu_dyn_regs *dyn_regs;
3710 
3711 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3712 
3713 	/*
3714 	 * here we update initial values for few specific dynamic regs (as
3715 	 * before reading the first descriptor from FW those value has to be
3716 	 * hard-coded) in later stages of the protocol those values will be
3717 	 * updated automatically by reading the FW descriptor so data there
3718 	 * will always be up-to-date
3719 	 */
3720 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3721 	dyn_regs->kmd_msg_to_cpu =
3722 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3723 	dyn_regs->cpu_cmd_status_to_host =
3724 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3725 
3726 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3727 }
3728 
3729 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3730 {
3731 	struct static_fw_load_mgr *static_loader;
3732 
3733 	static_loader = &hdev->fw_loader.static_loader;
3734 
3735 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3736 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3738 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3739 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3740 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3741 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3742 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3743 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3744 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3745 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3746 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3747 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3748 			GAUDI_PLDM_RESET_WAIT_MSEC :
3749 			GAUDI_CPU_RESET_WAIT_MSEC;
3750 }
3751 
3752 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3753 {
3754 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3755 
3756 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3757 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3758 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3759 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3760 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3761 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3762 }
3763 
3764 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3765 {
3766 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3767 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3768 
3769 	/* fill common fields */
3770 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3771 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3772 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3773 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3774 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3775 	fw_loader->skip_bmc = !hdev->bmc_enable;
3776 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3777 	fw_loader->dram_bar_id = HBM_BAR_ID;
3778 
3779 	if (prop->dynamic_fw_load)
3780 		gaudi_init_dynamic_firmware_loader(hdev);
3781 	else
3782 		gaudi_init_static_firmware_loader(hdev);
3783 }
3784 
3785 static int gaudi_init_cpu(struct hl_device *hdev)
3786 {
3787 	struct gaudi_device *gaudi = hdev->asic_specific;
3788 	int rc;
3789 
3790 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3791 		return 0;
3792 
3793 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3794 		return 0;
3795 
3796 	/*
3797 	 * The device CPU works with 40 bits addresses.
3798 	 * This register sets the extension to 50 bits.
3799 	 */
3800 	if (!hdev->asic_prop.fw_security_enabled)
3801 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3802 
3803 	rc = hl_fw_init_cpu(hdev);
3804 
3805 	if (rc)
3806 		return rc;
3807 
3808 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3809 
3810 	return 0;
3811 }
3812 
3813 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3814 {
3815 	struct cpu_dyn_regs *dyn_regs =
3816 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3817 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3818 	struct gaudi_device *gaudi = hdev->asic_specific;
3819 	u32 status, irq_handler_offset;
3820 	struct hl_eq *eq;
3821 	struct hl_hw_queue *cpu_pq =
3822 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3823 	int err;
3824 
3825 	if (!hdev->cpu_queues_enable)
3826 		return 0;
3827 
3828 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3829 		return 0;
3830 
3831 	eq = &hdev->event_queue;
3832 
3833 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3834 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3835 
3836 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3837 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3838 
3839 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3840 			lower_32_bits(hdev->cpu_accessible_dma_address));
3841 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3842 			upper_32_bits(hdev->cpu_accessible_dma_address));
3843 
3844 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3845 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3846 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3847 
3848 	/* Used for EQ CI */
3849 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3850 
3851 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3852 
3853 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3854 
3855 	irq_handler_offset = prop->gic_interrupts_enable ?
3856 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3857 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3858 
3859 	WREG32(irq_handler_offset,
3860 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3861 
3862 	err = hl_poll_timeout(
3863 		hdev,
3864 		mmCPU_IF_QUEUE_INIT,
3865 		status,
3866 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3867 		1000,
3868 		cpu_timeout);
3869 
3870 	if (err) {
3871 		dev_err(hdev->dev,
3872 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3873 		return -EIO;
3874 	}
3875 
3876 	/* update FW application security bits */
3877 	if (prop->fw_cpu_boot_dev_sts0_valid)
3878 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3879 	if (prop->fw_cpu_boot_dev_sts1_valid)
3880 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3881 
3882 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3883 	return 0;
3884 }
3885 
3886 static void gaudi_pre_hw_init(struct hl_device *hdev)
3887 {
3888 	/* Perform read from the device to make sure device is up */
3889 	RREG32(mmHW_STATE);
3890 
3891 	if (!hdev->asic_prop.fw_security_enabled) {
3892 		/* Set the access through PCI bars (Linux driver only) as
3893 		 * secured
3894 		 */
3895 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3896 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3897 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3898 
3899 		/* Perform read to flush the waiting writes to ensure
3900 		 * configuration was set in the device
3901 		 */
3902 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3903 	}
3904 
3905 	/*
3906 	 * Let's mark in the H/W that we have reached this point. We check
3907 	 * this value in the reset_before_init function to understand whether
3908 	 * we need to reset the chip before doing H/W init. This register is
3909 	 * cleared by the H/W upon H/W reset
3910 	 */
3911 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3912 }
3913 
3914 static int gaudi_hw_init(struct hl_device *hdev)
3915 {
3916 	struct gaudi_device *gaudi = hdev->asic_specific;
3917 	int rc;
3918 
3919 	gaudi_pre_hw_init(hdev);
3920 
3921 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3922 	 * So we set it here and if anyone tries to move it later to
3923 	 * a different address, there will be an error
3924 	 */
3925 	if (hdev->asic_prop.iatu_done_by_fw)
3926 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3927 
3928 	/*
3929 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3930 	 * base address of dram
3931 	 */
3932 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3933 		dev_err(hdev->dev,
3934 			"failed to map HBM bar to DRAM base address\n");
3935 		return -EIO;
3936 	}
3937 
3938 	rc = gaudi_init_cpu(hdev);
3939 	if (rc) {
3940 		dev_err(hdev->dev, "failed to initialize CPU\n");
3941 		return rc;
3942 	}
3943 
3944 	/* In case the clock gating was enabled in preboot we need to disable
3945 	 * it here before touching the MME/TPC registers.
3946 	 */
3947 	gaudi_disable_clock_gating(hdev);
3948 
3949 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3950 	gaudi_init_scrambler_sram(hdev);
3951 
3952 	/* This is here just in case we are working without CPU */
3953 	gaudi_init_scrambler_hbm(hdev);
3954 
3955 	gaudi_init_golden_registers(hdev);
3956 
3957 	rc = gaudi_mmu_init(hdev);
3958 	if (rc)
3959 		return rc;
3960 
3961 	gaudi_init_security(hdev);
3962 
3963 	gaudi_init_pci_dma_qmans(hdev);
3964 
3965 	gaudi_init_hbm_dma_qmans(hdev);
3966 
3967 	gaudi_init_mme_qmans(hdev);
3968 
3969 	gaudi_init_tpc_qmans(hdev);
3970 
3971 	gaudi_init_nic_qmans(hdev);
3972 
3973 	gaudi_enable_timestamp(hdev);
3974 
3975 	/* MSI must be enabled before CPU queues and NIC are initialized */
3976 	rc = gaudi_enable_msi(hdev);
3977 	if (rc)
3978 		goto disable_queues;
3979 
3980 	/* must be called after MSI was enabled */
3981 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3982 	if (rc) {
3983 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3984 			rc);
3985 		goto disable_msi;
3986 	}
3987 
3988 	/* Perform read from the device to flush all configuration */
3989 	RREG32(mmHW_STATE);
3990 
3991 	return 0;
3992 
3993 disable_msi:
3994 	gaudi_disable_msi(hdev);
3995 disable_queues:
3996 	gaudi_disable_mme_qmans(hdev);
3997 	gaudi_disable_pci_dma_qmans(hdev);
3998 
3999 	return rc;
4000 }
4001 
4002 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4003 {
4004 	struct cpu_dyn_regs *dyn_regs =
4005 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4006 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4007 	struct gaudi_device *gaudi = hdev->asic_specific;
4008 	bool driver_performs_reset;
4009 
4010 	if (!hard_reset) {
4011 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4012 		return 0;
4013 	}
4014 
4015 	if (hdev->pldm) {
4016 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4017 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4018 	} else {
4019 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4020 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4021 	}
4022 
4023 	if (fw_reset) {
4024 		dev_dbg(hdev->dev,
4025 			"Firmware performs HARD reset, going to wait %dms\n",
4026 			reset_timeout_ms);
4027 
4028 		goto skip_reset;
4029 	}
4030 
4031 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4032 					!hdev->asic_prop.hard_reset_done_by_fw);
4033 
4034 	/* Set device to handle FLR by H/W as we will put the device CPU to
4035 	 * halt mode
4036 	 */
4037 	if (driver_performs_reset)
4038 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4039 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4040 
4041 	/* If linux is loaded in the device CPU we need to communicate with it
4042 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4043 	 * registers in case of old F/Ws
4044 	 */
4045 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4046 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4047 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4048 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4049 
4050 		WREG32(irq_handler_offset,
4051 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4052 
4053 		/* This is a hail-mary attempt to revive the card in the small chance that the
4054 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4055 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4056 		 * reset as if Linux wasn't loaded.
4057 		 *
4058 		 * We do it only if the reset cause was HB, because that would be the indication
4059 		 * of such an event.
4060 		 *
4061 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4062 		 * damage.
4063 		 */
4064 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4065 			if (hdev->asic_prop.hard_reset_done_by_fw)
4066 				hl_fw_ask_hard_reset_without_linux(hdev);
4067 			else
4068 				hl_fw_ask_halt_machine_without_linux(hdev);
4069 		}
4070 	} else {
4071 		if (hdev->asic_prop.hard_reset_done_by_fw)
4072 			hl_fw_ask_hard_reset_without_linux(hdev);
4073 		else
4074 			hl_fw_ask_halt_machine_without_linux(hdev);
4075 	}
4076 
4077 	if (driver_performs_reset) {
4078 
4079 		/* Configure the reset registers. Must be done as early as
4080 		 * possible in case we fail during H/W initialization
4081 		 */
4082 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4083 						(CFG_RST_H_DMA_MASK |
4084 						CFG_RST_H_MME_MASK |
4085 						CFG_RST_H_SM_MASK |
4086 						CFG_RST_H_TPC_7_MASK));
4087 
4088 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4089 
4090 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4091 						(CFG_RST_H_HBM_MASK |
4092 						CFG_RST_H_TPC_7_MASK |
4093 						CFG_RST_H_NIC_MASK |
4094 						CFG_RST_H_SM_MASK |
4095 						CFG_RST_H_DMA_MASK |
4096 						CFG_RST_H_MME_MASK |
4097 						CFG_RST_H_CPU_MASK |
4098 						CFG_RST_H_MMU_MASK));
4099 
4100 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4101 						(CFG_RST_L_IF_MASK |
4102 						CFG_RST_L_PSOC_MASK |
4103 						CFG_RST_L_TPC_MASK));
4104 
4105 		msleep(cpu_timeout_ms);
4106 
4107 		/* Tell ASIC not to re-initialize PCIe */
4108 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4109 
4110 		/* Restart BTL/BLR upon hard-reset */
4111 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4112 
4113 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4114 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4115 
4116 		dev_dbg(hdev->dev,
4117 			"Issued HARD reset command, going to wait %dms\n",
4118 			reset_timeout_ms);
4119 	} else {
4120 		dev_dbg(hdev->dev,
4121 			"Firmware performs HARD reset, going to wait %dms\n",
4122 			reset_timeout_ms);
4123 	}
4124 
4125 skip_reset:
4126 	/*
4127 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4128 	 * itself is in reset. Need to wait until the reset is deasserted
4129 	 */
4130 	msleep(reset_timeout_ms);
4131 
4132 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4133 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4134 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4135 		return -ETIMEDOUT;
4136 	}
4137 
4138 	if (gaudi) {
4139 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4140 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4141 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4142 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4143 						HW_CAP_HBM_SCRAMBLER);
4144 
4145 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4146 
4147 		hdev->device_cpu_is_halted = false;
4148 	}
4149 	return 0;
4150 }
4151 
4152 static int gaudi_suspend(struct hl_device *hdev)
4153 {
4154 	int rc;
4155 
4156 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4157 	if (rc)
4158 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4159 
4160 	return rc;
4161 }
4162 
4163 static int gaudi_resume(struct hl_device *hdev)
4164 {
4165 	return gaudi_init_iatu(hdev);
4166 }
4167 
4168 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4169 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4170 {
4171 	int rc;
4172 
4173 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4174 			VM_DONTCOPY | VM_NORESERVE);
4175 
4176 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4177 				(dma_addr - HOST_PHYS_BASE), size);
4178 	if (rc)
4179 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4180 
4181 	return rc;
4182 }
4183 
4184 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4185 {
4186 	struct cpu_dyn_regs *dyn_regs =
4187 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4188 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4189 	struct gaudi_device *gaudi = hdev->asic_specific;
4190 	bool invalid_queue = false;
4191 	int dma_id;
4192 
4193 	switch (hw_queue_id) {
4194 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4195 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4196 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4197 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4198 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4199 		break;
4200 
4201 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4202 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4203 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4204 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4205 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4206 		break;
4207 
4208 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4209 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4210 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4211 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4212 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4213 		break;
4214 
4215 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4216 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4217 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4218 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4219 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4220 		break;
4221 
4222 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4223 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4224 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4225 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4226 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4227 		break;
4228 
4229 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4230 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4231 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4232 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4233 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4234 		break;
4235 
4236 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4237 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4238 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4239 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4240 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4241 		break;
4242 
4243 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4244 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4245 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4247 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248 		break;
4249 
4250 	case GAUDI_QUEUE_ID_CPU_PQ:
4251 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4252 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4253 		else
4254 			invalid_queue = true;
4255 		break;
4256 
4257 	case GAUDI_QUEUE_ID_MME_0_0:
4258 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4259 		break;
4260 
4261 	case GAUDI_QUEUE_ID_MME_0_1:
4262 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4263 		break;
4264 
4265 	case GAUDI_QUEUE_ID_MME_0_2:
4266 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4267 		break;
4268 
4269 	case GAUDI_QUEUE_ID_MME_0_3:
4270 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4271 		break;
4272 
4273 	case GAUDI_QUEUE_ID_MME_1_0:
4274 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4275 		break;
4276 
4277 	case GAUDI_QUEUE_ID_MME_1_1:
4278 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4279 		break;
4280 
4281 	case GAUDI_QUEUE_ID_MME_1_2:
4282 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4283 		break;
4284 
4285 	case GAUDI_QUEUE_ID_MME_1_3:
4286 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4287 		break;
4288 
4289 	case GAUDI_QUEUE_ID_TPC_0_0:
4290 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4291 		break;
4292 
4293 	case GAUDI_QUEUE_ID_TPC_0_1:
4294 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4295 		break;
4296 
4297 	case GAUDI_QUEUE_ID_TPC_0_2:
4298 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4299 		break;
4300 
4301 	case GAUDI_QUEUE_ID_TPC_0_3:
4302 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4303 		break;
4304 
4305 	case GAUDI_QUEUE_ID_TPC_1_0:
4306 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4307 		break;
4308 
4309 	case GAUDI_QUEUE_ID_TPC_1_1:
4310 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4311 		break;
4312 
4313 	case GAUDI_QUEUE_ID_TPC_1_2:
4314 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4315 		break;
4316 
4317 	case GAUDI_QUEUE_ID_TPC_1_3:
4318 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4319 		break;
4320 
4321 	case GAUDI_QUEUE_ID_TPC_2_0:
4322 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4323 		break;
4324 
4325 	case GAUDI_QUEUE_ID_TPC_2_1:
4326 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4327 		break;
4328 
4329 	case GAUDI_QUEUE_ID_TPC_2_2:
4330 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4331 		break;
4332 
4333 	case GAUDI_QUEUE_ID_TPC_2_3:
4334 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4335 		break;
4336 
4337 	case GAUDI_QUEUE_ID_TPC_3_0:
4338 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4339 		break;
4340 
4341 	case GAUDI_QUEUE_ID_TPC_3_1:
4342 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4343 		break;
4344 
4345 	case GAUDI_QUEUE_ID_TPC_3_2:
4346 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4347 		break;
4348 
4349 	case GAUDI_QUEUE_ID_TPC_3_3:
4350 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4351 		break;
4352 
4353 	case GAUDI_QUEUE_ID_TPC_4_0:
4354 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4355 		break;
4356 
4357 	case GAUDI_QUEUE_ID_TPC_4_1:
4358 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4359 		break;
4360 
4361 	case GAUDI_QUEUE_ID_TPC_4_2:
4362 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4363 		break;
4364 
4365 	case GAUDI_QUEUE_ID_TPC_4_3:
4366 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4367 		break;
4368 
4369 	case GAUDI_QUEUE_ID_TPC_5_0:
4370 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4371 		break;
4372 
4373 	case GAUDI_QUEUE_ID_TPC_5_1:
4374 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4375 		break;
4376 
4377 	case GAUDI_QUEUE_ID_TPC_5_2:
4378 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4379 		break;
4380 
4381 	case GAUDI_QUEUE_ID_TPC_5_3:
4382 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4383 		break;
4384 
4385 	case GAUDI_QUEUE_ID_TPC_6_0:
4386 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4387 		break;
4388 
4389 	case GAUDI_QUEUE_ID_TPC_6_1:
4390 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4391 		break;
4392 
4393 	case GAUDI_QUEUE_ID_TPC_6_2:
4394 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4395 		break;
4396 
4397 	case GAUDI_QUEUE_ID_TPC_6_3:
4398 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4399 		break;
4400 
4401 	case GAUDI_QUEUE_ID_TPC_7_0:
4402 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4403 		break;
4404 
4405 	case GAUDI_QUEUE_ID_TPC_7_1:
4406 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4407 		break;
4408 
4409 	case GAUDI_QUEUE_ID_TPC_7_2:
4410 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4411 		break;
4412 
4413 	case GAUDI_QUEUE_ID_TPC_7_3:
4414 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4415 		break;
4416 
4417 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4418 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4419 			invalid_queue = true;
4420 
4421 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4422 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4423 		break;
4424 
4425 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4426 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4427 			invalid_queue = true;
4428 
4429 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4430 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4431 		break;
4432 
4433 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4434 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4435 			invalid_queue = true;
4436 
4437 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4438 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4439 		break;
4440 
4441 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4442 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4443 			invalid_queue = true;
4444 
4445 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4446 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4447 		break;
4448 
4449 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4450 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4451 			invalid_queue = true;
4452 
4453 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4454 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4455 		break;
4456 
4457 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4458 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4459 			invalid_queue = true;
4460 
4461 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4462 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4463 		break;
4464 
4465 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4466 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4467 			invalid_queue = true;
4468 
4469 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4470 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4471 		break;
4472 
4473 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4474 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4475 			invalid_queue = true;
4476 
4477 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4478 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4479 		break;
4480 
4481 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4482 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4483 			invalid_queue = true;
4484 
4485 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4486 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4487 		break;
4488 
4489 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4490 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4491 			invalid_queue = true;
4492 
4493 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4494 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4495 		break;
4496 
4497 	default:
4498 		invalid_queue = true;
4499 	}
4500 
4501 	if (invalid_queue) {
4502 		/* Should never get here */
4503 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4504 			hw_queue_id);
4505 		return;
4506 	}
4507 
4508 	db_value = pi;
4509 
4510 	/* ring the doorbell */
4511 	WREG32(db_reg_offset, db_value);
4512 
4513 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4514 		/* make sure device CPU will read latest data from host */
4515 		mb();
4516 
4517 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4518 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4519 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4520 
4521 		WREG32(irq_handler_offset,
4522 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4523 	}
4524 }
4525 
4526 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4527 				struct hl_bd *bd)
4528 {
4529 	__le64 *pbd = (__le64 *) bd;
4530 
4531 	/* The QMANs are on the host memory so a simple copy suffice */
4532 	pqe[0] = pbd[0];
4533 	pqe[1] = pbd[1];
4534 }
4535 
4536 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4537 					dma_addr_t *dma_handle, gfp_t flags)
4538 {
4539 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4540 						dma_handle, flags);
4541 
4542 	/* Shift to the device's base physical address of host memory */
4543 	if (kernel_addr)
4544 		*dma_handle += HOST_PHYS_BASE;
4545 
4546 	return kernel_addr;
4547 }
4548 
4549 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4550 		void *cpu_addr, dma_addr_t dma_handle)
4551 {
4552 	/* Cancel the device's base physical address of host memory */
4553 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4554 
4555 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4556 }
4557 
4558 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4559 {
4560 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4561 	u64 cur_addr = prop->dram_user_base_address;
4562 	u32 chunk_size, busy;
4563 	int rc, dma_id;
4564 
4565 	while (cur_addr < prop->dram_end_address) {
4566 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4567 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4568 
4569 			chunk_size =
4570 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4571 
4572 			dev_dbg(hdev->dev,
4573 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4574 				cur_addr, cur_addr + chunk_size);
4575 
4576 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4577 					lower_32_bits(val));
4578 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4579 					upper_32_bits(val));
4580 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4581 						lower_32_bits(cur_addr));
4582 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4583 						upper_32_bits(cur_addr));
4584 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4585 					chunk_size);
4586 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4587 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4588 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4589 
4590 			cur_addr += chunk_size;
4591 
4592 			if (cur_addr == prop->dram_end_address)
4593 				break;
4594 		}
4595 
4596 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4597 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4598 
4599 			rc = hl_poll_timeout(
4600 				hdev,
4601 				mmDMA0_CORE_STS0 + dma_offset,
4602 				busy,
4603 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4604 				1000,
4605 				HBM_SCRUBBING_TIMEOUT_US);
4606 
4607 			if (rc) {
4608 				dev_err(hdev->dev,
4609 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4610 					dma_id);
4611 				return -EIO;
4612 			}
4613 		}
4614 	}
4615 
4616 	return 0;
4617 }
4618 
4619 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4620 {
4621 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4622 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4623 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4624 	u64 addr, size, val = hdev->memory_scrub_val;
4625 	ktime_t timeout;
4626 	int rc = 0;
4627 
4628 	if (!hdev->memory_scrub)
4629 		return 0;
4630 
4631 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4632 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4633 		if (ktime_compare(ktime_get(), timeout) > 0) {
4634 			dev_err(hdev->dev, "waiting for idle timeout\n");
4635 			return -ETIMEDOUT;
4636 		}
4637 		usleep_range((1000 >> 2) + 1, 1000);
4638 	}
4639 
4640 	/* Scrub SRAM */
4641 	addr = prop->sram_user_base_address;
4642 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4643 
4644 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4645 			addr, addr + size, val);
4646 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4647 	if (rc) {
4648 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4649 		return rc;
4650 	}
4651 
4652 	/* Scrub HBM using all DMA channels in parallel */
4653 	rc = gaudi_scrub_device_dram(hdev, val);
4654 	if (rc) {
4655 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4656 		return rc;
4657 	}
4658 
4659 	return 0;
4660 }
4661 
4662 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4663 				u32 queue_id, dma_addr_t *dma_handle,
4664 				u16 *queue_len)
4665 {
4666 	struct gaudi_device *gaudi = hdev->asic_specific;
4667 	struct gaudi_internal_qman_info *q;
4668 
4669 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4670 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4671 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4672 		return NULL;
4673 	}
4674 
4675 	q = &gaudi->internal_qmans[queue_id];
4676 	*dma_handle = q->pq_dma_addr;
4677 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4678 
4679 	return q->pq_kernel_addr;
4680 }
4681 
4682 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4683 				u16 len, u32 timeout, u64 *result)
4684 {
4685 	struct gaudi_device *gaudi = hdev->asic_specific;
4686 
4687 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4688 		if (result)
4689 			*result = 0;
4690 		return 0;
4691 	}
4692 
4693 	if (!timeout)
4694 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4695 
4696 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4697 						timeout, result);
4698 }
4699 
4700 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4701 {
4702 	struct packet_msg_prot *fence_pkt;
4703 	dma_addr_t pkt_dma_addr;
4704 	u32 fence_val, tmp, timeout_usec;
4705 	dma_addr_t fence_dma_addr;
4706 	u32 *fence_ptr;
4707 	int rc;
4708 
4709 	if (hdev->pldm)
4710 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4711 	else
4712 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4713 
4714 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4715 
4716 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4717 	if (!fence_ptr) {
4718 		dev_err(hdev->dev,
4719 			"Failed to allocate memory for H/W queue %d testing\n",
4720 			hw_queue_id);
4721 		return -ENOMEM;
4722 	}
4723 
4724 	*fence_ptr = 0;
4725 
4726 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4727 						&pkt_dma_addr);
4728 	if (!fence_pkt) {
4729 		dev_err(hdev->dev,
4730 			"Failed to allocate packet for H/W queue %d testing\n",
4731 			hw_queue_id);
4732 		rc = -ENOMEM;
4733 		goto free_fence_ptr;
4734 	}
4735 
4736 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4737 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4738 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4739 
4740 	fence_pkt->ctl = cpu_to_le32(tmp);
4741 	fence_pkt->value = cpu_to_le32(fence_val);
4742 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4743 
4744 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4745 					sizeof(struct packet_msg_prot),
4746 					pkt_dma_addr);
4747 	if (rc) {
4748 		dev_err(hdev->dev,
4749 			"Failed to send fence packet to H/W queue %d\n",
4750 			hw_queue_id);
4751 		goto free_pkt;
4752 	}
4753 
4754 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4755 					1000, timeout_usec, true);
4756 
4757 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4758 
4759 	if (rc == -ETIMEDOUT) {
4760 		dev_err(hdev->dev,
4761 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4762 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4763 		rc = -EIO;
4764 	}
4765 
4766 free_pkt:
4767 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4768 free_fence_ptr:
4769 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4770 	return rc;
4771 }
4772 
4773 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4774 {
4775 	struct gaudi_device *gaudi = hdev->asic_specific;
4776 
4777 	/*
4778 	 * check capability here as send_cpu_message() won't update the result
4779 	 * value if no capability
4780 	 */
4781 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4782 		return 0;
4783 
4784 	return hl_fw_test_cpu_queue(hdev);
4785 }
4786 
4787 static int gaudi_test_queues(struct hl_device *hdev)
4788 {
4789 	int i, rc, ret_val = 0;
4790 
4791 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4792 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4793 			rc = gaudi_test_queue(hdev, i);
4794 			if (rc)
4795 				ret_val = -EINVAL;
4796 		}
4797 	}
4798 
4799 	rc = gaudi_test_cpu_queue(hdev);
4800 	if (rc)
4801 		ret_val = -EINVAL;
4802 
4803 	return ret_val;
4804 }
4805 
4806 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4807 		gfp_t mem_flags, dma_addr_t *dma_handle)
4808 {
4809 	void *kernel_addr;
4810 
4811 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4812 		return NULL;
4813 
4814 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4815 
4816 	/* Shift to the device's base physical address of host memory */
4817 	if (kernel_addr)
4818 		*dma_handle += HOST_PHYS_BASE;
4819 
4820 	return kernel_addr;
4821 }
4822 
4823 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4824 			dma_addr_t dma_addr)
4825 {
4826 	/* Cancel the device's base physical address of host memory */
4827 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4828 
4829 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4830 }
4831 
4832 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4833 					size_t size, dma_addr_t *dma_handle)
4834 {
4835 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4836 }
4837 
4838 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4839 						size_t size, void *vaddr)
4840 {
4841 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4842 }
4843 
4844 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4845 {
4846 	struct scatterlist *sg, *sg_next_iter;
4847 	u32 count, dma_desc_cnt;
4848 	u64 len, len_next;
4849 	dma_addr_t addr, addr_next;
4850 
4851 	dma_desc_cnt = 0;
4852 
4853 	for_each_sgtable_dma_sg(sgt, sg, count) {
4854 		len = sg_dma_len(sg);
4855 		addr = sg_dma_address(sg);
4856 
4857 		if (len == 0)
4858 			break;
4859 
4860 		while ((count + 1) < sgt->nents) {
4861 			sg_next_iter = sg_next(sg);
4862 			len_next = sg_dma_len(sg_next_iter);
4863 			addr_next = sg_dma_address(sg_next_iter);
4864 
4865 			if (len_next == 0)
4866 				break;
4867 
4868 			if ((addr + len == addr_next) &&
4869 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4870 				len += len_next;
4871 				count++;
4872 				sg = sg_next_iter;
4873 			} else {
4874 				break;
4875 			}
4876 		}
4877 
4878 		dma_desc_cnt++;
4879 	}
4880 
4881 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4882 }
4883 
4884 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4885 				struct hl_cs_parser *parser,
4886 				struct packet_lin_dma *user_dma_pkt,
4887 				u64 addr, enum dma_data_direction dir)
4888 {
4889 	struct hl_userptr *userptr;
4890 	int rc;
4891 
4892 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4893 			parser->job_userptr_list, &userptr))
4894 		goto already_pinned;
4895 
4896 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4897 	if (!userptr)
4898 		return -ENOMEM;
4899 
4900 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4901 				userptr);
4902 	if (rc)
4903 		goto free_userptr;
4904 
4905 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4906 
4907 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4908 	if (rc) {
4909 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4910 		goto unpin_memory;
4911 	}
4912 
4913 	userptr->dma_mapped = true;
4914 	userptr->dir = dir;
4915 
4916 already_pinned:
4917 	parser->patched_cb_size +=
4918 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4919 
4920 	return 0;
4921 
4922 unpin_memory:
4923 	list_del(&userptr->job_node);
4924 	hl_unpin_host_memory(hdev, userptr);
4925 free_userptr:
4926 	kfree(userptr);
4927 	return rc;
4928 }
4929 
4930 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4931 				struct hl_cs_parser *parser,
4932 				struct packet_lin_dma *user_dma_pkt,
4933 				bool src_in_host)
4934 {
4935 	enum dma_data_direction dir;
4936 	bool skip_host_mem_pin = false, user_memset;
4937 	u64 addr;
4938 	int rc = 0;
4939 
4940 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4941 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4942 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4943 
4944 	if (src_in_host) {
4945 		if (user_memset)
4946 			skip_host_mem_pin = true;
4947 
4948 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4949 		dir = DMA_TO_DEVICE;
4950 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4951 	} else {
4952 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4953 		dir = DMA_FROM_DEVICE;
4954 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4955 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4956 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4957 	}
4958 
4959 	if (skip_host_mem_pin)
4960 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4961 	else
4962 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4963 						addr, dir);
4964 
4965 	return rc;
4966 }
4967 
4968 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4969 				struct hl_cs_parser *parser,
4970 				struct packet_lin_dma *user_dma_pkt)
4971 {
4972 	bool src_in_host = false;
4973 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4974 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4975 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4976 
4977 	dev_dbg(hdev->dev, "DMA packet details:\n");
4978 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4979 				le64_to_cpu(user_dma_pkt->src_addr));
4980 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4981 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4982 
4983 	/*
4984 	 * Special handling for DMA with size 0. Bypass all validations
4985 	 * because no transactions will be done except for WR_COMP, which
4986 	 * is not a security issue
4987 	 */
4988 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
4989 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4990 		return 0;
4991 	}
4992 
4993 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4994 		src_in_host = true;
4995 
4996 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4997 						src_in_host);
4998 }
4999 
5000 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5001 					struct hl_cs_parser *parser,
5002 					struct packet_load_and_exe *user_pkt)
5003 {
5004 	u32 cfg;
5005 
5006 	cfg = le32_to_cpu(user_pkt->cfg);
5007 
5008 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5009 		dev_err(hdev->dev,
5010 			"User not allowed to use Load and Execute\n");
5011 		return -EPERM;
5012 	}
5013 
5014 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5015 
5016 	return 0;
5017 }
5018 
5019 static int gaudi_validate_cb(struct hl_device *hdev,
5020 			struct hl_cs_parser *parser, bool is_mmu)
5021 {
5022 	u32 cb_parsed_length = 0;
5023 	int rc = 0;
5024 
5025 	parser->patched_cb_size = 0;
5026 
5027 	/* cb_user_size is more than 0 so loop will always be executed */
5028 	while (cb_parsed_length < parser->user_cb_size) {
5029 		enum packet_id pkt_id;
5030 		u16 pkt_size;
5031 		struct gaudi_packet *user_pkt;
5032 
5033 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5034 
5035 		pkt_id = (enum packet_id) (
5036 				(le64_to_cpu(user_pkt->header) &
5037 				PACKET_HEADER_PACKET_ID_MASK) >>
5038 					PACKET_HEADER_PACKET_ID_SHIFT);
5039 
5040 		if (!validate_packet_id(pkt_id)) {
5041 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5042 			rc = -EINVAL;
5043 			break;
5044 		}
5045 
5046 		pkt_size = gaudi_packet_sizes[pkt_id];
5047 		cb_parsed_length += pkt_size;
5048 		if (cb_parsed_length > parser->user_cb_size) {
5049 			dev_err(hdev->dev,
5050 				"packet 0x%x is out of CB boundary\n", pkt_id);
5051 			rc = -EINVAL;
5052 			break;
5053 		}
5054 
5055 		switch (pkt_id) {
5056 		case PACKET_MSG_PROT:
5057 			dev_err(hdev->dev,
5058 				"User not allowed to use MSG_PROT\n");
5059 			rc = -EPERM;
5060 			break;
5061 
5062 		case PACKET_CP_DMA:
5063 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5064 			rc = -EPERM;
5065 			break;
5066 
5067 		case PACKET_STOP:
5068 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5069 			rc = -EPERM;
5070 			break;
5071 
5072 		case PACKET_WREG_BULK:
5073 			dev_err(hdev->dev,
5074 				"User not allowed to use WREG_BULK\n");
5075 			rc = -EPERM;
5076 			break;
5077 
5078 		case PACKET_LOAD_AND_EXE:
5079 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5080 				(struct packet_load_and_exe *) user_pkt);
5081 			break;
5082 
5083 		case PACKET_LIN_DMA:
5084 			parser->contains_dma_pkt = true;
5085 			if (is_mmu)
5086 				parser->patched_cb_size += pkt_size;
5087 			else
5088 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5089 					(struct packet_lin_dma *) user_pkt);
5090 			break;
5091 
5092 		case PACKET_WREG_32:
5093 		case PACKET_MSG_LONG:
5094 		case PACKET_MSG_SHORT:
5095 		case PACKET_REPEAT:
5096 		case PACKET_FENCE:
5097 		case PACKET_NOP:
5098 		case PACKET_ARB_POINT:
5099 			parser->patched_cb_size += pkt_size;
5100 			break;
5101 
5102 		default:
5103 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5104 				pkt_id);
5105 			rc = -EINVAL;
5106 			break;
5107 		}
5108 
5109 		if (rc)
5110 			break;
5111 	}
5112 
5113 	/*
5114 	 * The new CB should have space at the end for two MSG_PROT packets:
5115 	 * 1. Optional NOP padding for cacheline alignment
5116 	 * 2. A packet that will act as a completion packet
5117 	 * 3. A packet that will generate MSI interrupt
5118 	 */
5119 	if (parser->completion)
5120 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5121 			parser->patched_cb_size);
5122 
5123 	return rc;
5124 }
5125 
5126 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5127 				struct hl_cs_parser *parser,
5128 				struct packet_lin_dma *user_dma_pkt,
5129 				struct packet_lin_dma *new_dma_pkt,
5130 				u32 *new_dma_pkt_size)
5131 {
5132 	struct hl_userptr *userptr;
5133 	struct scatterlist *sg, *sg_next_iter;
5134 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5135 	u64 len, len_next;
5136 	dma_addr_t dma_addr, dma_addr_next;
5137 	u64 device_memory_addr, addr;
5138 	enum dma_data_direction dir;
5139 	struct sg_table *sgt;
5140 	bool src_in_host = false;
5141 	bool skip_host_mem_pin = false;
5142 	bool user_memset;
5143 
5144 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5145 
5146 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5147 		src_in_host = true;
5148 
5149 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5150 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5151 
5152 	if (src_in_host) {
5153 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5154 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5155 		dir = DMA_TO_DEVICE;
5156 		if (user_memset)
5157 			skip_host_mem_pin = true;
5158 	} else {
5159 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5160 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5161 		dir = DMA_FROM_DEVICE;
5162 	}
5163 
5164 	if ((!skip_host_mem_pin) &&
5165 		(!hl_userptr_is_pinned(hdev, addr,
5166 					le32_to_cpu(user_dma_pkt->tsize),
5167 					parser->job_userptr_list, &userptr))) {
5168 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5169 				addr, user_dma_pkt->tsize);
5170 		return -EFAULT;
5171 	}
5172 
5173 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5174 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5175 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5176 		return 0;
5177 	}
5178 
5179 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5180 
5181 	sgt = userptr->sgt;
5182 	dma_desc_cnt = 0;
5183 
5184 	for_each_sgtable_dma_sg(sgt, sg, count) {
5185 		len = sg_dma_len(sg);
5186 		dma_addr = sg_dma_address(sg);
5187 
5188 		if (len == 0)
5189 			break;
5190 
5191 		while ((count + 1) < sgt->nents) {
5192 			sg_next_iter = sg_next(sg);
5193 			len_next = sg_dma_len(sg_next_iter);
5194 			dma_addr_next = sg_dma_address(sg_next_iter);
5195 
5196 			if (len_next == 0)
5197 				break;
5198 
5199 			if ((dma_addr + len == dma_addr_next) &&
5200 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5201 				len += len_next;
5202 				count++;
5203 				sg = sg_next_iter;
5204 			} else {
5205 				break;
5206 			}
5207 		}
5208 
5209 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5210 		if (likely(dma_desc_cnt))
5211 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5212 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5213 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5214 		new_dma_pkt->tsize = cpu_to_le32(len);
5215 
5216 		if (dir == DMA_TO_DEVICE) {
5217 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5218 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5219 		} else {
5220 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5221 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5222 		}
5223 
5224 		if (!user_memset)
5225 			device_memory_addr += len;
5226 		dma_desc_cnt++;
5227 		new_dma_pkt++;
5228 	}
5229 
5230 	if (!dma_desc_cnt) {
5231 		dev_err(hdev->dev,
5232 			"Error of 0 SG entries when patching DMA packet\n");
5233 		return -EFAULT;
5234 	}
5235 
5236 	/* Fix the last dma packet - wrcomp must be as user set it */
5237 	new_dma_pkt--;
5238 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5239 
5240 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5241 
5242 	return 0;
5243 }
5244 
5245 static int gaudi_patch_cb(struct hl_device *hdev,
5246 				struct hl_cs_parser *parser)
5247 {
5248 	u32 cb_parsed_length = 0;
5249 	u32 cb_patched_cur_length = 0;
5250 	int rc = 0;
5251 
5252 	/* cb_user_size is more than 0 so loop will always be executed */
5253 	while (cb_parsed_length < parser->user_cb_size) {
5254 		enum packet_id pkt_id;
5255 		u16 pkt_size;
5256 		u32 new_pkt_size = 0;
5257 		struct gaudi_packet *user_pkt, *kernel_pkt;
5258 
5259 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5260 		kernel_pkt = parser->patched_cb->kernel_address +
5261 					cb_patched_cur_length;
5262 
5263 		pkt_id = (enum packet_id) (
5264 				(le64_to_cpu(user_pkt->header) &
5265 				PACKET_HEADER_PACKET_ID_MASK) >>
5266 					PACKET_HEADER_PACKET_ID_SHIFT);
5267 
5268 		if (!validate_packet_id(pkt_id)) {
5269 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5270 			rc = -EINVAL;
5271 			break;
5272 		}
5273 
5274 		pkt_size = gaudi_packet_sizes[pkt_id];
5275 		cb_parsed_length += pkt_size;
5276 		if (cb_parsed_length > parser->user_cb_size) {
5277 			dev_err(hdev->dev,
5278 				"packet 0x%x is out of CB boundary\n", pkt_id);
5279 			rc = -EINVAL;
5280 			break;
5281 		}
5282 
5283 		switch (pkt_id) {
5284 		case PACKET_LIN_DMA:
5285 			rc = gaudi_patch_dma_packet(hdev, parser,
5286 					(struct packet_lin_dma *) user_pkt,
5287 					(struct packet_lin_dma *) kernel_pkt,
5288 					&new_pkt_size);
5289 			cb_patched_cur_length += new_pkt_size;
5290 			break;
5291 
5292 		case PACKET_MSG_PROT:
5293 			dev_err(hdev->dev,
5294 				"User not allowed to use MSG_PROT\n");
5295 			rc = -EPERM;
5296 			break;
5297 
5298 		case PACKET_CP_DMA:
5299 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5300 			rc = -EPERM;
5301 			break;
5302 
5303 		case PACKET_STOP:
5304 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5305 			rc = -EPERM;
5306 			break;
5307 
5308 		case PACKET_WREG_32:
5309 		case PACKET_WREG_BULK:
5310 		case PACKET_MSG_LONG:
5311 		case PACKET_MSG_SHORT:
5312 		case PACKET_REPEAT:
5313 		case PACKET_FENCE:
5314 		case PACKET_NOP:
5315 		case PACKET_ARB_POINT:
5316 		case PACKET_LOAD_AND_EXE:
5317 			memcpy(kernel_pkt, user_pkt, pkt_size);
5318 			cb_patched_cur_length += pkt_size;
5319 			break;
5320 
5321 		default:
5322 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5323 				pkt_id);
5324 			rc = -EINVAL;
5325 			break;
5326 		}
5327 
5328 		if (rc)
5329 			break;
5330 	}
5331 
5332 	return rc;
5333 }
5334 
5335 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5336 		struct hl_cs_parser *parser)
5337 {
5338 	u64 handle;
5339 	u32 patched_cb_size;
5340 	struct hl_cb *user_cb;
5341 	int rc;
5342 
5343 	/*
5344 	 * The new CB should have space at the end for two MSG_PROT packets:
5345 	 * 1. Optional NOP padding for cacheline alignment
5346 	 * 2. A packet that will act as a completion packet
5347 	 * 3. A packet that will generate MSI interrupt
5348 	 */
5349 	if (parser->completion)
5350 		parser->patched_cb_size = parser->user_cb_size +
5351 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5352 	else
5353 		parser->patched_cb_size = parser->user_cb_size;
5354 
5355 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5356 				parser->patched_cb_size, false, false,
5357 				&handle);
5358 
5359 	if (rc) {
5360 		dev_err(hdev->dev,
5361 			"Failed to allocate patched CB for DMA CS %d\n",
5362 			rc);
5363 		return rc;
5364 	}
5365 
5366 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5367 	/* hl_cb_get should never fail */
5368 	if (!parser->patched_cb) {
5369 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5370 		rc = -EFAULT;
5371 		goto out;
5372 	}
5373 
5374 	/*
5375 	 * We are protected from overflow because the check
5376 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5377 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5378 	 *
5379 	 * There is no option to reach here without going through that check because:
5380 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5381 	 *    an external queue.
5382 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5383 	 */
5384 	memcpy(parser->patched_cb->kernel_address,
5385 		parser->user_cb->kernel_address,
5386 		parser->user_cb_size);
5387 
5388 	patched_cb_size = parser->patched_cb_size;
5389 
5390 	/* Validate patched CB instead of user CB */
5391 	user_cb = parser->user_cb;
5392 	parser->user_cb = parser->patched_cb;
5393 	rc = gaudi_validate_cb(hdev, parser, true);
5394 	parser->user_cb = user_cb;
5395 
5396 	if (rc) {
5397 		hl_cb_put(parser->patched_cb);
5398 		goto out;
5399 	}
5400 
5401 	if (patched_cb_size != parser->patched_cb_size) {
5402 		dev_err(hdev->dev, "user CB size mismatch\n");
5403 		hl_cb_put(parser->patched_cb);
5404 		rc = -EINVAL;
5405 		goto out;
5406 	}
5407 
5408 out:
5409 	/*
5410 	 * Always call cb destroy here because we still have 1 reference
5411 	 * to it by calling cb_get earlier. After the job will be completed,
5412 	 * cb_put will release it, but here we want to remove it from the
5413 	 * idr
5414 	 */
5415 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5416 
5417 	return rc;
5418 }
5419 
5420 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5421 		struct hl_cs_parser *parser)
5422 {
5423 	u64 handle;
5424 	int rc;
5425 
5426 	rc = gaudi_validate_cb(hdev, parser, false);
5427 
5428 	if (rc)
5429 		goto free_userptr;
5430 
5431 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5432 				parser->patched_cb_size, false, false,
5433 				&handle);
5434 	if (rc) {
5435 		dev_err(hdev->dev,
5436 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5437 		goto free_userptr;
5438 	}
5439 
5440 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5441 	/* hl_cb_get should never fail here */
5442 	if (!parser->patched_cb) {
5443 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5444 		rc = -EFAULT;
5445 		goto out;
5446 	}
5447 
5448 	rc = gaudi_patch_cb(hdev, parser);
5449 
5450 	if (rc)
5451 		hl_cb_put(parser->patched_cb);
5452 
5453 out:
5454 	/*
5455 	 * Always call cb destroy here because we still have 1 reference
5456 	 * to it by calling cb_get earlier. After the job will be completed,
5457 	 * cb_put will release it, but here we want to remove it from the
5458 	 * idr
5459 	 */
5460 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5461 
5462 free_userptr:
5463 	if (rc)
5464 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5465 	return rc;
5466 }
5467 
5468 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5469 					struct hl_cs_parser *parser)
5470 {
5471 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5472 	struct gaudi_device *gaudi = hdev->asic_specific;
5473 	u32 nic_queue_offset, nic_mask_q_id;
5474 
5475 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5476 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5477 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5478 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5479 
5480 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5481 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5482 			return -EINVAL;
5483 		}
5484 	}
5485 
5486 	/* For internal queue jobs just check if CB address is valid */
5487 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5488 					parser->user_cb_size,
5489 					asic_prop->sram_user_base_address,
5490 					asic_prop->sram_end_address))
5491 		return 0;
5492 
5493 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5494 					parser->user_cb_size,
5495 					asic_prop->dram_user_base_address,
5496 					asic_prop->dram_end_address))
5497 		return 0;
5498 
5499 	/* PMMU and HPMMU addresses are equal, check only one of them */
5500 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5501 					parser->user_cb_size,
5502 					asic_prop->pmmu.start_addr,
5503 					asic_prop->pmmu.end_addr))
5504 		return 0;
5505 
5506 	dev_err(hdev->dev,
5507 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5508 		parser->user_cb, parser->user_cb_size);
5509 
5510 	return -EFAULT;
5511 }
5512 
5513 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5514 {
5515 	struct gaudi_device *gaudi = hdev->asic_specific;
5516 
5517 	if (parser->queue_type == QUEUE_TYPE_INT)
5518 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5519 
5520 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5521 		return gaudi_parse_cb_mmu(hdev, parser);
5522 	else
5523 		return gaudi_parse_cb_no_mmu(hdev, parser);
5524 }
5525 
5526 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5527 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5528 				u32 msi_vec, bool eb)
5529 {
5530 	struct packet_msg_prot *cq_pkt;
5531 	struct packet_nop *cq_padding;
5532 	u64 msi_addr;
5533 	u32 tmp;
5534 
5535 	cq_padding = kernel_address + original_len;
5536 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5537 
5538 	while ((void *)cq_padding < (void *)cq_pkt) {
5539 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5540 		cq_padding++;
5541 	}
5542 
5543 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5544 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5545 
5546 	if (eb)
5547 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5548 
5549 	cq_pkt->ctl = cpu_to_le32(tmp);
5550 	cq_pkt->value = cpu_to_le32(cq_val);
5551 	cq_pkt->addr = cpu_to_le64(cq_addr);
5552 
5553 	cq_pkt++;
5554 
5555 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5556 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5557 	cq_pkt->ctl = cpu_to_le32(tmp);
5558 	cq_pkt->value = cpu_to_le32(1);
5559 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5560 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5561 }
5562 
5563 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5564 {
5565 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5566 }
5567 
5568 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5569 					u32 size, u64 val)
5570 {
5571 	struct packet_lin_dma *lin_dma_pkt;
5572 	struct hl_cs_job *job;
5573 	u32 cb_size, ctl, err_cause;
5574 	struct hl_cb *cb;
5575 	int rc;
5576 
5577 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5578 	if (!cb)
5579 		return -EFAULT;
5580 
5581 	lin_dma_pkt = cb->kernel_address;
5582 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5583 	cb_size = sizeof(*lin_dma_pkt);
5584 
5585 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5586 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5587 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5588 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5589 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5590 
5591 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5592 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5593 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5594 	lin_dma_pkt->tsize = cpu_to_le32(size);
5595 
5596 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5597 	if (!job) {
5598 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5599 		rc = -ENOMEM;
5600 		goto release_cb;
5601 	}
5602 
5603 	/* Verify DMA is OK */
5604 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5605 	if (err_cause && !hdev->init_done) {
5606 		dev_dbg(hdev->dev,
5607 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5608 			err_cause);
5609 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5610 	}
5611 
5612 	job->id = 0;
5613 	job->user_cb = cb;
5614 	atomic_inc(&job->user_cb->cs_cnt);
5615 	job->user_cb_size = cb_size;
5616 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5617 	job->patched_cb = job->user_cb;
5618 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5619 
5620 	hl_debugfs_add_job(hdev, job);
5621 
5622 	rc = gaudi_send_job_on_qman0(hdev, job);
5623 	hl_debugfs_remove_job(hdev, job);
5624 	kfree(job);
5625 	atomic_dec(&cb->cs_cnt);
5626 
5627 	/* Verify DMA is OK */
5628 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5629 	if (err_cause) {
5630 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5631 		rc = -EIO;
5632 		if (!hdev->init_done) {
5633 			dev_dbg(hdev->dev,
5634 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5635 				err_cause);
5636 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5637 		}
5638 	}
5639 
5640 release_cb:
5641 	hl_cb_put(cb);
5642 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5643 
5644 	return rc;
5645 }
5646 
5647 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5648 					u32 num_regs, u32 val)
5649 {
5650 	struct packet_msg_long *pkt;
5651 	struct hl_cs_job *job;
5652 	u32 cb_size, ctl;
5653 	struct hl_cb *cb;
5654 	int i, rc;
5655 
5656 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5657 
5658 	if (cb_size > SZ_2M) {
5659 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5660 		return -ENOMEM;
5661 	}
5662 
5663 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5664 	if (!cb)
5665 		return -EFAULT;
5666 
5667 	pkt = cb->kernel_address;
5668 
5669 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5670 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5671 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5672 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5673 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5674 
5675 	for (i = 0; i < num_regs ; i++, pkt++) {
5676 		pkt->ctl = cpu_to_le32(ctl);
5677 		pkt->value = cpu_to_le32(val);
5678 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5679 	}
5680 
5681 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5682 	if (!job) {
5683 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5684 		rc = -ENOMEM;
5685 		goto release_cb;
5686 	}
5687 
5688 	job->id = 0;
5689 	job->user_cb = cb;
5690 	atomic_inc(&job->user_cb->cs_cnt);
5691 	job->user_cb_size = cb_size;
5692 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5693 	job->patched_cb = job->user_cb;
5694 	job->job_cb_size = cb_size;
5695 
5696 	hl_debugfs_add_job(hdev, job);
5697 
5698 	rc = gaudi_send_job_on_qman0(hdev, job);
5699 	hl_debugfs_remove_job(hdev, job);
5700 	kfree(job);
5701 	atomic_dec(&cb->cs_cnt);
5702 
5703 release_cb:
5704 	hl_cb_put(cb);
5705 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5706 
5707 	return rc;
5708 }
5709 
5710 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5711 {
5712 	u64 base_addr;
5713 	u32 num_regs;
5714 	int rc;
5715 
5716 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5717 	num_regs = NUM_OF_SOB_IN_BLOCK;
5718 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5719 	if (rc) {
5720 		dev_err(hdev->dev, "failed resetting SM registers");
5721 		return -ENOMEM;
5722 	}
5723 
5724 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5725 	num_regs = NUM_OF_SOB_IN_BLOCK;
5726 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5727 	if (rc) {
5728 		dev_err(hdev->dev, "failed resetting SM registers");
5729 		return -ENOMEM;
5730 	}
5731 
5732 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5733 	num_regs = NUM_OF_SOB_IN_BLOCK;
5734 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5735 	if (rc) {
5736 		dev_err(hdev->dev, "failed resetting SM registers");
5737 		return -ENOMEM;
5738 	}
5739 
5740 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5741 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5742 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5743 	if (rc) {
5744 		dev_err(hdev->dev, "failed resetting SM registers");
5745 		return -ENOMEM;
5746 	}
5747 
5748 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5749 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5750 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5751 	if (rc) {
5752 		dev_err(hdev->dev, "failed resetting SM registers");
5753 		return -ENOMEM;
5754 	}
5755 
5756 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5757 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5758 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5759 	if (rc) {
5760 		dev_err(hdev->dev, "failed resetting SM registers");
5761 		return -ENOMEM;
5762 	}
5763 
5764 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5765 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5766 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5767 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5768 	if (rc) {
5769 		dev_err(hdev->dev, "failed resetting SM registers");
5770 		return -ENOMEM;
5771 	}
5772 
5773 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5774 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5775 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5776 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5777 	if (rc) {
5778 		dev_err(hdev->dev, "failed resetting SM registers");
5779 		return -ENOMEM;
5780 	}
5781 
5782 	return 0;
5783 }
5784 
5785 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5786 {
5787 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5788 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5789 	int i;
5790 
5791 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5792 		u64 sob_addr = CFG_BASE +
5793 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5794 				(i * sob_delta);
5795 		u32 dma_offset = i * DMA_CORE_OFFSET;
5796 
5797 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5798 				lower_32_bits(sob_addr));
5799 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5800 				upper_32_bits(sob_addr));
5801 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5802 
5803 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5804 		 * modified by the user for SRAM reduction
5805 		 */
5806 		if (i > 1)
5807 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5808 								0x00000001);
5809 	}
5810 }
5811 
5812 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5813 {
5814 	u32 qman_offset;
5815 	int i;
5816 
5817 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5818 		qman_offset = i * DMA_QMAN_OFFSET;
5819 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5820 	}
5821 
5822 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5823 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5824 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5825 	}
5826 
5827 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5828 		qman_offset = i * TPC_QMAN_OFFSET;
5829 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5830 	}
5831 
5832 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5833 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5834 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5835 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5836 	}
5837 }
5838 
5839 static int gaudi_restore_user_registers(struct hl_device *hdev)
5840 {
5841 	int rc;
5842 
5843 	rc = gaudi_restore_sm_registers(hdev);
5844 	if (rc)
5845 		return rc;
5846 
5847 	gaudi_restore_dma_registers(hdev);
5848 	gaudi_restore_qm_registers(hdev);
5849 
5850 	return 0;
5851 }
5852 
5853 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5854 {
5855 	return 0;
5856 }
5857 
5858 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5859 {
5860 	u32 size = hdev->asic_prop.mmu_pgt_size +
5861 			hdev->asic_prop.mmu_cache_mng_size;
5862 	struct gaudi_device *gaudi = hdev->asic_specific;
5863 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5864 
5865 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5866 		return 0;
5867 
5868 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5869 }
5870 
5871 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5872 {
5873 
5874 }
5875 
5876 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5877 					u32 size_to_dma, dma_addr_t dma_addr)
5878 {
5879 	u32 err_cause, val;
5880 	u64 dma_offset;
5881 	int rc;
5882 
5883 	dma_offset = dma_id * DMA_CORE_OFFSET;
5884 
5885 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5886 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5887 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5888 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5889 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5890 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5891 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5892 
5893 	rc = hl_poll_timeout(
5894 		hdev,
5895 		mmDMA0_CORE_STS0 + dma_offset,
5896 		val,
5897 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5898 		0,
5899 		1000000);
5900 
5901 	if (rc) {
5902 		dev_err(hdev->dev,
5903 			"DMA %d timed-out during reading of 0x%llx\n",
5904 			dma_id, addr);
5905 		return -EIO;
5906 	}
5907 
5908 	/* Verify DMA is OK */
5909 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5910 	if (err_cause) {
5911 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5912 		dev_dbg(hdev->dev,
5913 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5914 			err_cause);
5915 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5916 
5917 		return -EIO;
5918 	}
5919 
5920 	return 0;
5921 }
5922 
5923 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5924 				void *blob_addr)
5925 {
5926 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5927 	u32 qm_glbl_sts0, qm_cgm_sts;
5928 	u64 dma_offset, qm_offset;
5929 	dma_addr_t dma_addr;
5930 	void *kernel_addr;
5931 	bool is_eng_idle;
5932 	int rc = 0, dma_id;
5933 
5934 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5935 
5936 	if (!kernel_addr)
5937 		return -ENOMEM;
5938 
5939 	hdev->asic_funcs->hw_queues_lock(hdev);
5940 
5941 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5942 	dma_offset = dma_id * DMA_CORE_OFFSET;
5943 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5944 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5945 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5946 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5947 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5948 		      IS_DMA_IDLE(dma_core_sts0);
5949 
5950 	if (!is_eng_idle) {
5951 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5952 		dma_offset = dma_id * DMA_CORE_OFFSET;
5953 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5954 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5955 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5956 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5957 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5958 			      IS_DMA_IDLE(dma_core_sts0);
5959 
5960 		if (!is_eng_idle) {
5961 			dev_err_ratelimited(hdev->dev,
5962 				"Can't read via DMA because it is BUSY\n");
5963 			rc = -EAGAIN;
5964 			goto out;
5965 		}
5966 	}
5967 
5968 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5969 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5970 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5971 
5972 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5973 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5974 	 * ASID
5975 	 */
5976 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5977 
5978 	/* Verify DMA is OK */
5979 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5980 	if (err_cause) {
5981 		dev_dbg(hdev->dev,
5982 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5983 			err_cause);
5984 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5985 	}
5986 
5987 	pos = 0;
5988 	size_left = size;
5989 	size_to_dma = SZ_2M;
5990 
5991 	while (size_left > 0) {
5992 
5993 		if (size_left < SZ_2M)
5994 			size_to_dma = size_left;
5995 
5996 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5997 						dma_addr);
5998 		if (rc)
5999 			break;
6000 
6001 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6002 
6003 		if (size_left <= SZ_2M)
6004 			break;
6005 
6006 		pos += SZ_2M;
6007 		addr += SZ_2M;
6008 		size_left -= SZ_2M;
6009 	}
6010 
6011 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6012 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6013 	 * ASID
6014 	 */
6015 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6016 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6017 
6018 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6019 
6020 out:
6021 	hdev->asic_funcs->hw_queues_unlock(hdev);
6022 
6023 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6024 
6025 	return rc;
6026 }
6027 
6028 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6029 {
6030 	struct gaudi_device *gaudi = hdev->asic_specific;
6031 
6032 	if (hdev->reset_info.hard_reset_pending)
6033 		return U64_MAX;
6034 
6035 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6036 			(addr - gaudi->hbm_bar_cur_addr));
6037 }
6038 
6039 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6040 {
6041 	struct gaudi_device *gaudi = hdev->asic_specific;
6042 
6043 	if (hdev->reset_info.hard_reset_pending)
6044 		return;
6045 
6046 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6047 			(addr - gaudi->hbm_bar_cur_addr));
6048 }
6049 
6050 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6051 {
6052 	/* mask to zero the MMBP and ASID bits */
6053 	WREG32_AND(reg, ~0x7FF);
6054 	WREG32_OR(reg, asid);
6055 }
6056 
6057 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6058 {
6059 	struct gaudi_device *gaudi = hdev->asic_specific;
6060 
6061 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6062 		return;
6063 
6064 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6065 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6066 		return;
6067 	}
6068 
6069 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6070 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6071 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6072 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6073 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6074 
6075 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6076 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6077 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6078 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6079 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6080 
6081 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086 
6087 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092 
6093 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098 
6099 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104 
6105 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110 
6111 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6112 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6113 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6115 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6116 
6117 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6118 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6119 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6120 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6121 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6122 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6123 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6124 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6125 
6126 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6127 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6128 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6129 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6130 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6133 
6134 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6135 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6136 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6137 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6138 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6141 
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6144 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6146 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6149 
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6152 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6157 
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6161 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6165 
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6168 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6173 
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6176 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6181 
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6185 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6189 
6190 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6200 
6201 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6213 
6214 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6215 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6216 				asid);
6217 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6218 				asid);
6219 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6220 				asid);
6221 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6222 				asid);
6223 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6224 				asid);
6225 	}
6226 
6227 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6228 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6229 				asid);
6230 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6231 				asid);
6232 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6233 				asid);
6234 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6235 				asid);
6236 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6237 				asid);
6238 	}
6239 
6240 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6241 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6242 				asid);
6243 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6244 				asid);
6245 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6246 				asid);
6247 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6248 				asid);
6249 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6250 				asid);
6251 	}
6252 
6253 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6254 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6255 				asid);
6256 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6257 				asid);
6258 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6259 				asid);
6260 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6261 				asid);
6262 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6263 				asid);
6264 	}
6265 
6266 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6267 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6268 				asid);
6269 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6270 				asid);
6271 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6272 				asid);
6273 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6274 				asid);
6275 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6276 				asid);
6277 	}
6278 
6279 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6280 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6281 				asid);
6282 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6283 				asid);
6284 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6285 				asid);
6286 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6287 				asid);
6288 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6289 				asid);
6290 	}
6291 
6292 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6296 				asid);
6297 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6298 				asid);
6299 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6300 				asid);
6301 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6302 				asid);
6303 	}
6304 
6305 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6309 				asid);
6310 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6311 				asid);
6312 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6313 				asid);
6314 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6315 				asid);
6316 	}
6317 
6318 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6322 				asid);
6323 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6324 				asid);
6325 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6326 				asid);
6327 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6328 				asid);
6329 	}
6330 
6331 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6335 				asid);
6336 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6337 				asid);
6338 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6339 				asid);
6340 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6341 				asid);
6342 	}
6343 
6344 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6345 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6346 }
6347 
6348 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6349 		struct hl_cs_job *job)
6350 {
6351 	struct packet_msg_prot *fence_pkt;
6352 	u32 *fence_ptr;
6353 	dma_addr_t fence_dma_addr;
6354 	struct hl_cb *cb;
6355 	u32 tmp, timeout, dma_offset;
6356 	int rc;
6357 
6358 	if (hdev->pldm)
6359 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6360 	else
6361 		timeout = HL_DEVICE_TIMEOUT_USEC;
6362 
6363 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6364 	if (!fence_ptr) {
6365 		dev_err(hdev->dev,
6366 			"Failed to allocate fence memory for QMAN0\n");
6367 		return -ENOMEM;
6368 	}
6369 
6370 	cb = job->patched_cb;
6371 
6372 	fence_pkt = cb->kernel_address +
6373 			job->job_cb_size - sizeof(struct packet_msg_prot);
6374 
6375 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6376 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6377 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6378 
6379 	fence_pkt->ctl = cpu_to_le32(tmp);
6380 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6381 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6382 
6383 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6384 
6385 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6386 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6387 
6388 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6389 					job->job_cb_size, cb->bus_address);
6390 	if (rc) {
6391 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6392 		goto free_fence_ptr;
6393 	}
6394 
6395 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6396 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6397 				timeout, true);
6398 
6399 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6400 
6401 	if (rc == -ETIMEDOUT) {
6402 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6403 		goto free_fence_ptr;
6404 	}
6405 
6406 free_fence_ptr:
6407 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6408 
6409 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6410 	return rc;
6411 }
6412 
6413 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6414 {
6415 	if (event_type >= GAUDI_EVENT_SIZE)
6416 		goto event_not_supported;
6417 
6418 	if (!gaudi_irq_map_table[event_type].valid)
6419 		goto event_not_supported;
6420 
6421 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6422 
6423 	return;
6424 
6425 event_not_supported:
6426 	snprintf(desc, size, "N/A");
6427 }
6428 
6429 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6430 							bool is_write, u16 *engine_id_1,
6431 							u16 *engine_id_2)
6432 {
6433 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6434 
6435 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6436 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6437 
6438 	switch (x_y) {
6439 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6440 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6441 		dma_id[0] = 0;
6442 		dma_id[1] = 2;
6443 		break;
6444 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6445 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6446 		dma_id[0] = 1;
6447 		dma_id[1] = 3;
6448 		break;
6449 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6450 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6451 		dma_id[0] = 4;
6452 		dma_id[1] = 6;
6453 		break;
6454 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6455 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6456 		dma_id[0] = 5;
6457 		dma_id[1] = 7;
6458 		break;
6459 	default:
6460 		goto unknown_initiator;
6461 	}
6462 
6463 	for (i = 0 ; i < 2 ; i++) {
6464 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6465 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466 	}
6467 
6468 	switch (x_y) {
6469 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6470 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6471 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6472 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6473 			return "DMA0";
6474 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6475 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6476 			return "DMA2";
6477 		} else {
6478 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6479 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6480 			return "DMA0 or DMA2";
6481 		}
6482 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6483 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6484 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6485 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6486 			return "DMA1";
6487 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6488 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6489 			return "DMA3";
6490 		} else {
6491 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6492 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6493 			return "DMA1 or DMA3";
6494 		}
6495 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6496 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6497 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6498 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6499 			return "DMA4";
6500 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6501 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6502 			return "DMA6";
6503 		} else {
6504 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6505 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6506 			return "DMA4 or DMA6";
6507 		}
6508 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6509 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6510 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6511 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6512 			return "DMA5";
6513 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6514 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6515 			return "DMA7";
6516 		} else {
6517 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6518 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6519 			return "DMA5 or DMA7";
6520 		}
6521 	}
6522 
6523 unknown_initiator:
6524 	return "unknown initiator";
6525 }
6526 
6527 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6528 							u16 *engine_id_1, u16 *engine_id_2)
6529 {
6530 	u32 val, x_y, axi_id;
6531 
6532 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6533 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6534 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6535 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6536 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6537 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6538 
6539 	switch (x_y) {
6540 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6541 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6542 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6543 			return "TPC0";
6544 		}
6545 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6546 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6547 			return "NIC0";
6548 		}
6549 		break;
6550 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6551 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6552 		return "TPC1";
6553 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6554 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6555 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6556 		return "MME0";
6557 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6558 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6559 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6560 		return "MME1";
6561 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6562 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6563 		return "TPC2";
6564 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6565 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6566 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6567 			return "TPC3";
6568 		}
6569 		/* PCI, CPU or PSOC does not have engine id*/
6570 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6571 			return "PCI";
6572 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6573 			return "CPU";
6574 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6575 			return "PSOC";
6576 		break;
6577 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6578 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6579 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6580 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6581 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6582 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6583 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6584 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6585 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6586 				engine_id_1, engine_id_2);
6587 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6588 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6589 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6590 			return "TPC4";
6591 		}
6592 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6593 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6594 			return "NIC1";
6595 		}
6596 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6597 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6598 			return "NIC2";
6599 		}
6600 		break;
6601 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6602 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6603 		return "TPC5";
6604 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6605 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6606 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6607 		return "MME2";
6608 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6609 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6610 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6611 		return "MME3";
6612 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6613 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6614 		return "TPC6";
6615 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6616 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6617 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6618 			return "TPC7";
6619 		}
6620 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6621 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6622 			return "NIC4";
6623 		}
6624 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6625 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6626 			return "NIC5";
6627 		}
6628 		break;
6629 	default:
6630 		break;
6631 	}
6632 
6633 	dev_err(hdev->dev,
6634 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6635 		val,
6636 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6637 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6638 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6639 			RAZWI_INITIATOR_AXI_ID_MASK);
6640 
6641 	return "unknown initiator";
6642 }
6643 
6644 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6645 						u16 *engine_id_2, bool *is_read, bool *is_write)
6646 {
6647 
6648 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6649 		dev_err_ratelimited(hdev->dev,
6650 			"RAZWI event caused by illegal write of %s\n",
6651 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6652 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6653 		*is_write = true;
6654 	}
6655 
6656 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6657 		dev_err_ratelimited(hdev->dev,
6658 			"RAZWI event caused by illegal read of %s\n",
6659 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6660 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6661 		*is_read = true;
6662 	}
6663 }
6664 
6665 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6666 {
6667 	struct gaudi_device *gaudi = hdev->asic_specific;
6668 	u32 val;
6669 
6670 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6671 		return;
6672 
6673 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6674 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6675 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6676 		*addr <<= 32;
6677 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6678 
6679 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6680 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6681 
6682 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6683 	}
6684 
6685 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6686 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6687 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6688 		*addr <<= 32;
6689 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6690 
6691 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6692 
6693 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6694 	}
6695 }
6696 
6697 /*
6698  *  +-------------------+------------------------------------------------------+
6699  *  | Configuration Reg |                     Description                      |
6700  *  |      Address      |                                                      |
6701  *  +-------------------+------------------------------------------------------+
6702  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6703  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6704  *  |                   |0xF34 memory wrappers 63:32                           |
6705  *  |                   |0xF38 memory wrappers 95:64                           |
6706  *  |                   |0xF3C memory wrappers 127:96                          |
6707  *  +-------------------+------------------------------------------------------+
6708  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6709  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6710  *  |                   |0xF44 memory wrappers 63:32                           |
6711  *  |                   |0xF48 memory wrappers 95:64                           |
6712  *  |                   |0xF4C memory wrappers 127:96                          |
6713  *  +-------------------+------------------------------------------------------+
6714  */
6715 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6716 		struct ecc_info_extract_params *params, u64 *ecc_address,
6717 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6718 {
6719 	u32 i, num_mem_regs, reg, err_bit;
6720 	u64 err_addr, err_word = 0;
6721 
6722 	num_mem_regs = params->num_memories / 32 +
6723 			((params->num_memories % 32) ? 1 : 0);
6724 
6725 	if (params->block_address >= CFG_BASE)
6726 		params->block_address -= CFG_BASE;
6727 
6728 	if (params->derr)
6729 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6730 	else
6731 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6732 
6733 	/* Set invalid wrapper index */
6734 	*memory_wrapper_idx = 0xFF;
6735 
6736 	/* Iterate through memory wrappers, a single bit must be set */
6737 	for (i = 0 ; i < num_mem_regs ; i++) {
6738 		err_addr += i * 4;
6739 		err_word = RREG32(err_addr);
6740 		if (err_word) {
6741 			err_bit = __ffs(err_word);
6742 			*memory_wrapper_idx = err_bit + (32 * i);
6743 			break;
6744 		}
6745 	}
6746 
6747 	if (*memory_wrapper_idx == 0xFF) {
6748 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6749 		return -EINVAL;
6750 	}
6751 
6752 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6753 			*memory_wrapper_idx);
6754 
6755 	*ecc_address =
6756 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6757 	*ecc_syndrom =
6758 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6759 
6760 	/* Clear error indication */
6761 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6762 	if (params->derr)
6763 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6764 	else
6765 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6766 
6767 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6768 
6769 	return 0;
6770 }
6771 
6772 /*
6773  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6774  *
6775  * @idx: the current pi/ci value
6776  * @q_len: the queue length (power of 2)
6777  *
6778  * @return the cyclically decremented index
6779  */
6780 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6781 {
6782 	u32 mask = q_len - 1;
6783 
6784 	/*
6785 	 * modular decrement is equivalent to adding (queue_size -1)
6786 	 * later we take LSBs to make sure the value is in the
6787 	 * range [0, queue_len - 1]
6788 	 */
6789 	return (idx + q_len - 1) & mask;
6790 }
6791 
6792 /**
6793  * gaudi_handle_sw_config_stream_data - print SW config stream data
6794  *
6795  * @hdev: pointer to the habanalabs device structure
6796  * @stream: the QMAN's stream
6797  * @qman_base: base address of QMAN registers block
6798  * @event_mask: mask of the last events occurred
6799  */
6800 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6801 						u64 qman_base, u64 event_mask)
6802 {
6803 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6804 	u32 cq_ptr_lo_off, size;
6805 
6806 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6807 
6808 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6809 						stream * cq_ptr_lo_off;
6810 	cq_ptr_hi = cq_ptr_lo +
6811 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6812 	cq_tsize = cq_ptr_lo +
6813 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6814 
6815 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6816 	size = RREG32(cq_tsize);
6817 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6818 							stream, cq_ptr, size);
6819 
6820 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6821 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6822 		hdev->captured_err_info.undef_opcode.cq_size = size;
6823 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6824 	}
6825 }
6826 
6827 /**
6828  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6829  *
6830  * @hdev: pointer to the habanalabs device structure
6831  * @qid_base: first QID of the QMAN (out of 4 streams)
6832  * @stream: the QMAN's stream
6833  * @qman_base: base address of QMAN registers block
6834  * @event_mask: mask of the last events occurred
6835  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6836  */
6837 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6838 						u32 stream, u64 qman_base,
6839 						u64 event_mask,
6840 						bool pr_sw_conf)
6841 {
6842 	u32 ci, qm_ci_stream_off, queue_len;
6843 	struct hl_hw_queue *q;
6844 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6845 	int i;
6846 
6847 	q = &hdev->kernel_queues[qid_base + stream];
6848 
6849 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6850 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6851 						stream * qm_ci_stream_off;
6852 
6853 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6854 					q->int_queue_len : HL_QUEUE_LENGTH;
6855 
6856 	hdev->asic_funcs->hw_queues_lock(hdev);
6857 
6858 	if (pr_sw_conf)
6859 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6860 
6861 	ci = RREG32(pq_ci);
6862 
6863 	/* we should start printing form ci -1 */
6864 	ci = gaudi_queue_idx_dec(ci, queue_len);
6865 	memset(addr, 0, sizeof(addr));
6866 
6867 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6868 		struct hl_bd *bd;
6869 		u32 len;
6870 
6871 		bd = q->kernel_address;
6872 		bd += ci;
6873 
6874 		len = le32_to_cpu(bd->len);
6875 		/* len 0 means uninitialized entry- break */
6876 		if (!len)
6877 			break;
6878 
6879 		addr[i] = le64_to_cpu(bd->ptr);
6880 
6881 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6882 							stream, ci, addr[i], len);
6883 
6884 		/* get previous ci, wrap if needed */
6885 		ci = gaudi_queue_idx_dec(ci, queue_len);
6886 	}
6887 
6888 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6889 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6890 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6891 
6892 		if (arr_idx == 0) {
6893 			undef_opcode->timestamp = ktime_get();
6894 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6895 		}
6896 
6897 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6898 		undef_opcode->cb_addr_streams_len++;
6899 	}
6900 
6901 	hdev->asic_funcs->hw_queues_unlock(hdev);
6902 }
6903 
6904 /**
6905  * handle_qman_data_on_err - extract QMAN data on error
6906  *
6907  * @hdev: pointer to the habanalabs device structure
6908  * @qid_base: first QID of the QMAN (out of 4 streams)
6909  * @stream: the QMAN's stream
6910  * @qman_base: base address of QMAN registers block
6911  * @event_mask: mask of the last events occurred
6912  *
6913  * This function attempt to exatract as much data as possible on QMAN error.
6914  * On upper CP print the SW config stream data and last 8 PQEs.
6915  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6916  */
6917 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6918 				   u32 stream, u64 qman_base, u64 event_mask)
6919 {
6920 	u32 i;
6921 
6922 	if (stream != QMAN_STREAMS) {
6923 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6924 			qman_base, event_mask, true);
6925 		return;
6926 	}
6927 
6928 	/* handle Lower-CP */
6929 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6930 
6931 	for (i = 0; i < QMAN_STREAMS; i++)
6932 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6933 			qman_base, event_mask, false);
6934 }
6935 
6936 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6937 					  const char *qm_name,
6938 					  u64 qman_base,
6939 					  u32 qid_base,
6940 					  u64 *event_mask)
6941 {
6942 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6943 	u64 glbl_sts_addr, arb_err_addr;
6944 	char reg_desc[32];
6945 
6946 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6947 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6948 
6949 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6950 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6951 		glbl_sts_clr_val = 0;
6952 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6953 
6954 		if (!glbl_sts_val)
6955 			continue;
6956 
6957 		if (i == QMAN_STREAMS)
6958 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6959 		else
6960 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6961 
6962 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6963 			if (glbl_sts_val & BIT(j)) {
6964 				dev_err_ratelimited(hdev->dev,
6965 						"%s %s. err cause: %s\n",
6966 						qm_name, reg_desc,
6967 						gaudi_qman_error_cause[j]);
6968 				glbl_sts_clr_val |= BIT(j);
6969 			}
6970 		}
6971 		/* check for undefined opcode */
6972 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6973 				hdev->captured_err_info.undef_opcode.write_enable) {
6974 			memset(&hdev->captured_err_info.undef_opcode, 0,
6975 						sizeof(hdev->captured_err_info.undef_opcode));
6976 
6977 			hdev->captured_err_info.undef_opcode.write_enable = false;
6978 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6979 		}
6980 
6981 		/* Write 1 clear errors */
6982 		if (!hdev->stop_on_err)
6983 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6984 		else
6985 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6986 	}
6987 
6988 	arb_err_val = RREG32(arb_err_addr);
6989 
6990 	if (!arb_err_val)
6991 		return;
6992 
6993 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6994 		if (arb_err_val & BIT(j)) {
6995 			dev_err_ratelimited(hdev->dev,
6996 					"%s ARB_ERR. err cause: %s\n",
6997 					qm_name,
6998 					gaudi_qman_arb_error_cause[j]);
6999 		}
7000 	}
7001 }
7002 
7003 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7004 		struct hl_eq_sm_sei_data *sei_data)
7005 {
7006 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7007 
7008 	/* Flip the bits as the enum is ordered in the opposite way */
7009 	index = (index ^ 0x3) & 0x3;
7010 
7011 	switch (sei_data->sei_cause) {
7012 	case SM_SEI_SO_OVERFLOW:
7013 		dev_err_ratelimited(hdev->dev,
7014 			"%s SEI Error: SOB Group %u overflow/underflow",
7015 			gaudi_sync_manager_names[index],
7016 			le32_to_cpu(sei_data->sei_log));
7017 		break;
7018 	case SM_SEI_LBW_4B_UNALIGNED:
7019 		dev_err_ratelimited(hdev->dev,
7020 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7021 			gaudi_sync_manager_names[index],
7022 			le32_to_cpu(sei_data->sei_log));
7023 		break;
7024 	case SM_SEI_AXI_RESPONSE_ERR:
7025 		dev_err_ratelimited(hdev->dev,
7026 			"%s SEI Error: AXI ID %u response error",
7027 			gaudi_sync_manager_names[index],
7028 			le32_to_cpu(sei_data->sei_log));
7029 		break;
7030 	default:
7031 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7032 				le32_to_cpu(sei_data->sei_log));
7033 		break;
7034 	}
7035 }
7036 
7037 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7038 		struct hl_eq_ecc_data *ecc_data)
7039 {
7040 	struct ecc_info_extract_params params;
7041 	u64 ecc_address = 0, ecc_syndrom = 0;
7042 	u8 index, memory_wrapper_idx = 0;
7043 	bool extract_info_from_fw;
7044 	int rc;
7045 
7046 	if (hdev->asic_prop.fw_security_enabled) {
7047 		extract_info_from_fw = true;
7048 		goto extract_ecc_info;
7049 	}
7050 
7051 	switch (event_type) {
7052 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7053 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7054 		extract_info_from_fw = true;
7055 		break;
7056 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7057 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7058 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7059 		params.num_memories = 90;
7060 		params.derr = false;
7061 		extract_info_from_fw = false;
7062 		break;
7063 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7064 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7065 		params.block_address =
7066 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7067 		params.num_memories = 90;
7068 		params.derr = true;
7069 		extract_info_from_fw = false;
7070 		break;
7071 	case GAUDI_EVENT_MME0_ACC_SERR:
7072 	case GAUDI_EVENT_MME1_ACC_SERR:
7073 	case GAUDI_EVENT_MME2_ACC_SERR:
7074 	case GAUDI_EVENT_MME3_ACC_SERR:
7075 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7076 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7077 		params.num_memories = 128;
7078 		params.derr = false;
7079 		extract_info_from_fw = false;
7080 		break;
7081 	case GAUDI_EVENT_MME0_ACC_DERR:
7082 	case GAUDI_EVENT_MME1_ACC_DERR:
7083 	case GAUDI_EVENT_MME2_ACC_DERR:
7084 	case GAUDI_EVENT_MME3_ACC_DERR:
7085 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7086 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7087 		params.num_memories = 128;
7088 		params.derr = true;
7089 		extract_info_from_fw = false;
7090 		break;
7091 	case GAUDI_EVENT_MME0_SBAB_SERR:
7092 	case GAUDI_EVENT_MME1_SBAB_SERR:
7093 	case GAUDI_EVENT_MME2_SBAB_SERR:
7094 	case GAUDI_EVENT_MME3_SBAB_SERR:
7095 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7096 		params.block_address =
7097 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7098 		params.num_memories = 33;
7099 		params.derr = false;
7100 		extract_info_from_fw = false;
7101 		break;
7102 	case GAUDI_EVENT_MME0_SBAB_DERR:
7103 	case GAUDI_EVENT_MME1_SBAB_DERR:
7104 	case GAUDI_EVENT_MME2_SBAB_DERR:
7105 	case GAUDI_EVENT_MME3_SBAB_DERR:
7106 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7107 		params.block_address =
7108 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7109 		params.num_memories = 33;
7110 		params.derr = true;
7111 		extract_info_from_fw = false;
7112 		break;
7113 	default:
7114 		return;
7115 	}
7116 
7117 extract_ecc_info:
7118 	if (extract_info_from_fw) {
7119 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7120 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7121 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7122 	} else {
7123 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7124 				&ecc_syndrom, &memory_wrapper_idx);
7125 		if (rc)
7126 			return;
7127 	}
7128 
7129 	dev_err(hdev->dev,
7130 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7131 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7132 }
7133 
7134 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7135 {
7136 	u64 qman_base;
7137 	char desc[32];
7138 	u32 qid_base;
7139 	u8 index;
7140 
7141 	switch (event_type) {
7142 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7143 		index = event_type - GAUDI_EVENT_TPC0_QM;
7144 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7145 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7146 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7147 		break;
7148 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7149 		if (event_type == GAUDI_EVENT_MME0_QM) {
7150 			index = 0;
7151 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7152 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7153 			index = 2;
7154 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7155 		}
7156 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7157 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7158 		break;
7159 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7160 		index = event_type - GAUDI_EVENT_DMA0_QM;
7161 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7162 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7163 		if (index > 1)
7164 			qid_base++;
7165 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7166 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7167 		break;
7168 	case GAUDI_EVENT_NIC0_QM0:
7169 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7170 		qman_base = mmNIC0_QM0_BASE;
7171 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7172 		break;
7173 	case GAUDI_EVENT_NIC0_QM1:
7174 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7175 		qman_base = mmNIC0_QM1_BASE;
7176 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7177 		break;
7178 	case GAUDI_EVENT_NIC1_QM0:
7179 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7180 		qman_base = mmNIC1_QM0_BASE;
7181 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7182 		break;
7183 	case GAUDI_EVENT_NIC1_QM1:
7184 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7185 		qman_base = mmNIC1_QM1_BASE;
7186 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7187 		break;
7188 	case GAUDI_EVENT_NIC2_QM0:
7189 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7190 		qman_base = mmNIC2_QM0_BASE;
7191 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7192 		break;
7193 	case GAUDI_EVENT_NIC2_QM1:
7194 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7195 		qman_base = mmNIC2_QM1_BASE;
7196 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7197 		break;
7198 	case GAUDI_EVENT_NIC3_QM0:
7199 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7200 		qman_base = mmNIC3_QM0_BASE;
7201 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7202 		break;
7203 	case GAUDI_EVENT_NIC3_QM1:
7204 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7205 		qman_base = mmNIC3_QM1_BASE;
7206 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7207 		break;
7208 	case GAUDI_EVENT_NIC4_QM0:
7209 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7210 		qman_base = mmNIC4_QM0_BASE;
7211 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7212 		break;
7213 	case GAUDI_EVENT_NIC4_QM1:
7214 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7215 		qman_base = mmNIC4_QM1_BASE;
7216 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7217 		break;
7218 	default:
7219 		return;
7220 	}
7221 
7222 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7223 }
7224 
7225 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7226 					bool check_razwi, u64 *event_mask)
7227 {
7228 	bool is_read = false, is_write = false;
7229 	u16 engine_id[2], num_of_razwi_eng = 0;
7230 	char desc[64] = "";
7231 	u64 razwi_addr = 0;
7232 	u8 razwi_flags = 0;
7233 
7234 	/*
7235 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7236 	 * engine id it will get valid value.
7237 	 */
7238 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7239 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7240 
7241 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7242 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7243 		event_type, desc);
7244 
7245 	if (check_razwi) {
7246 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7247 						&is_write);
7248 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7249 
7250 		if (is_read)
7251 			razwi_flags |= HL_RAZWI_READ;
7252 		if (is_write)
7253 			razwi_flags |= HL_RAZWI_WRITE;
7254 
7255 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7256 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7257 				num_of_razwi_eng = 2;
7258 			else
7259 				num_of_razwi_eng = 1;
7260 		}
7261 
7262 		if (razwi_flags)
7263 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7264 					razwi_flags, event_mask);
7265 	}
7266 }
7267 
7268 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7269 					struct cpucp_pkt_sync_err *sync_err)
7270 {
7271 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7272 
7273 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7274 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7275 }
7276 
7277 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7278 					struct hl_eq_fw_alive *fw_alive)
7279 {
7280 	dev_err(hdev->dev,
7281 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7282 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7283 		le32_to_cpu(fw_alive->process_id),
7284 		le32_to_cpu(fw_alive->thread_id),
7285 		le64_to_cpu(fw_alive->uptime_seconds));
7286 }
7287 
7288 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7289 						void *data)
7290 {
7291 	char desc[64] = "", *type;
7292 	struct eq_nic_sei_event *eq_nic_sei = data;
7293 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7294 
7295 	switch (eq_nic_sei->axi_error_cause) {
7296 	case RXB:
7297 		type = "RXB";
7298 		break;
7299 	case RXE:
7300 		type = "RXE";
7301 		break;
7302 	case TXS:
7303 		type = "TXS";
7304 		break;
7305 	case TXE:
7306 		type = "TXE";
7307 		break;
7308 	case QPC_RESP:
7309 		type = "QPC_RESP";
7310 		break;
7311 	case NON_AXI_ERR:
7312 		type = "NON_AXI_ERR";
7313 		break;
7314 	case TMR:
7315 		type = "TMR";
7316 		break;
7317 	default:
7318 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7319 			eq_nic_sei->axi_error_cause);
7320 		type = "N/A";
7321 		break;
7322 	}
7323 
7324 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7325 			eq_nic_sei->id);
7326 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7327 		event_type, desc);
7328 }
7329 
7330 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7331 {
7332 	/* GAUDI doesn't support any reset except hard-reset */
7333 	return -EPERM;
7334 }
7335 
7336 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7337 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7338 {
7339 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7340 	int rc = 0;
7341 
7342 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7343 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7344 		if (!hbm_ecc_data) {
7345 			dev_err(hdev->dev, "No FW ECC data");
7346 			return 0;
7347 		}
7348 
7349 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7350 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7351 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7352 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7354 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7356 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7358 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7359 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7360 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7361 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7362 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363 
7364 		dev_err(hdev->dev,
7365 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7366 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7367 		dev_err(hdev->dev,
7368 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7369 			device, ch, hbm_ecc_data->first_addr, type,
7370 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7371 			hbm_ecc_data->dec_cnt);
7372 		return 0;
7373 	}
7374 
7375 	if (hdev->asic_prop.fw_security_enabled) {
7376 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7377 		return 0;
7378 	}
7379 
7380 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7381 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7382 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7383 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7384 		if (val) {
7385 			rc = -EIO;
7386 			dev_err(hdev->dev,
7387 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7388 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7389 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7390 				(val >> 4) & 0x1);
7391 
7392 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7393 			dev_err(hdev->dev,
7394 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7395 				device, ch * 2,
7396 				RREG32(base + ch * 0x1000 + 0x064),
7397 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7398 				(val2 & 0xFF0000) >> 16,
7399 				(val2 & 0xFF000000) >> 24);
7400 		}
7401 
7402 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7403 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7404 		if (val) {
7405 			rc = -EIO;
7406 			dev_err(hdev->dev,
7407 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7408 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7409 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7410 				(val >> 4) & 0x1);
7411 
7412 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7413 			dev_err(hdev->dev,
7414 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7415 				device, ch * 2 + 1,
7416 				RREG32(base + ch * 0x1000 + 0x074),
7417 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7418 				(val2 & 0xFF0000) >> 16,
7419 				(val2 & 0xFF000000) >> 24);
7420 		}
7421 
7422 		/* Clear interrupts */
7423 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7424 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7425 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7426 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7427 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7428 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7429 	}
7430 
7431 	val  = RREG32(base + 0x8F30);
7432 	val2 = RREG32(base + 0x8F34);
7433 	if (val | val2) {
7434 		rc = -EIO;
7435 		dev_err(hdev->dev,
7436 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7437 			device, val, val2);
7438 	}
7439 	val  = RREG32(base + 0x8F40);
7440 	val2 = RREG32(base + 0x8F44);
7441 	if (val | val2) {
7442 		rc = -EIO;
7443 		dev_err(hdev->dev,
7444 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7445 			device, val, val2);
7446 	}
7447 
7448 	return rc;
7449 }
7450 
7451 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7452 {
7453 	switch (hbm_event_type) {
7454 	case GAUDI_EVENT_HBM0_SPI_0:
7455 	case GAUDI_EVENT_HBM0_SPI_1:
7456 		return 0;
7457 	case GAUDI_EVENT_HBM1_SPI_0:
7458 	case GAUDI_EVENT_HBM1_SPI_1:
7459 		return 1;
7460 	case GAUDI_EVENT_HBM2_SPI_0:
7461 	case GAUDI_EVENT_HBM2_SPI_1:
7462 		return 2;
7463 	case GAUDI_EVENT_HBM3_SPI_0:
7464 	case GAUDI_EVENT_HBM3_SPI_1:
7465 		return 3;
7466 	default:
7467 		break;
7468 	}
7469 
7470 	/* Should never happen */
7471 	return 0;
7472 }
7473 
7474 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7475 					char *interrupt_name)
7476 {
7477 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7478 	bool soft_reset_required = false;
7479 
7480 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7481 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7482 
7483 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7484 		if (tpc_interrupts_cause & BIT(i)) {
7485 			dev_err_ratelimited(hdev->dev,
7486 					"TPC%d_%s interrupt cause: %s\n",
7487 					tpc_id, interrupt_name,
7488 					gaudi_tpc_interrupts_cause[i]);
7489 			/* If this is QM error, we need to soft-reset */
7490 			if (i == 15)
7491 				soft_reset_required = true;
7492 		}
7493 
7494 	/* Clear interrupts */
7495 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7496 
7497 	return soft_reset_required;
7498 }
7499 
7500 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7501 {
7502 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7503 }
7504 
7505 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7506 {
7507 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7508 }
7509 
7510 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7511 {
7512 	ktime_t zero_time = ktime_set(0, 0);
7513 
7514 	mutex_lock(&hdev->clk_throttling.lock);
7515 
7516 	switch (event_type) {
7517 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7518 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7519 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7520 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7521 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7522 		dev_info_ratelimited(hdev->dev,
7523 			"Clock throttling due to power consumption\n");
7524 		break;
7525 
7526 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7527 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7528 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7529 		dev_info_ratelimited(hdev->dev,
7530 			"Power envelop is safe, back to optimal clock\n");
7531 		break;
7532 
7533 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7534 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7535 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7536 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7537 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7538 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7539 		dev_info_ratelimited(hdev->dev,
7540 			"Clock throttling due to overheating\n");
7541 		break;
7542 
7543 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7544 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7545 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7546 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7547 		dev_info_ratelimited(hdev->dev,
7548 			"Thermal envelop is safe, back to optimal clock\n");
7549 		break;
7550 
7551 	default:
7552 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7553 			event_type);
7554 		break;
7555 	}
7556 
7557 	mutex_unlock(&hdev->clk_throttling.lock);
7558 }
7559 
7560 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7561 {
7562 	struct gaudi_device *gaudi = hdev->asic_specific;
7563 	struct hl_info_fw_err_info fw_err_info;
7564 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7565 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7566 	u32 fw_fatal_err_flag = 0, flags = 0;
7567 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7568 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7569 	bool reset_required, reset_direct = false;
7570 	u8 cause;
7571 	int rc;
7572 
7573 	if (event_type >= GAUDI_EVENT_SIZE) {
7574 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7575 				event_type, GAUDI_EVENT_SIZE - 1);
7576 		return;
7577 	}
7578 
7579 	gaudi->events_stat[event_type]++;
7580 	gaudi->events_stat_aggregate[event_type]++;
7581 
7582 	switch (event_type) {
7583 	case GAUDI_EVENT_PCIE_CORE_DERR:
7584 	case GAUDI_EVENT_PCIE_IF_DERR:
7585 	case GAUDI_EVENT_PCIE_PHY_DERR:
7586 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7587 	case GAUDI_EVENT_MME0_ACC_DERR:
7588 	case GAUDI_EVENT_MME0_SBAB_DERR:
7589 	case GAUDI_EVENT_MME1_ACC_DERR:
7590 	case GAUDI_EVENT_MME1_SBAB_DERR:
7591 	case GAUDI_EVENT_MME2_ACC_DERR:
7592 	case GAUDI_EVENT_MME2_SBAB_DERR:
7593 	case GAUDI_EVENT_MME3_ACC_DERR:
7594 	case GAUDI_EVENT_MME3_SBAB_DERR:
7595 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7596 		fallthrough;
7597 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7598 	case GAUDI_EVENT_PSOC_MEM_DERR:
7599 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7600 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7601 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7602 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7603 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7604 	case GAUDI_EVENT_MMU_DERR:
7605 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7606 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7607 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7608 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7609 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7610 		goto reset_device;
7611 
7612 	case GAUDI_EVENT_GIC500:
7613 	case GAUDI_EVENT_AXI_ECC:
7614 	case GAUDI_EVENT_L2_RAM_ECC:
7615 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7616 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7617 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7618 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7619 		goto reset_device;
7620 
7621 	case GAUDI_EVENT_HBM0_SPI_0:
7622 	case GAUDI_EVENT_HBM1_SPI_0:
7623 	case GAUDI_EVENT_HBM2_SPI_0:
7624 	case GAUDI_EVENT_HBM3_SPI_0:
7625 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7626 		gaudi_hbm_read_interrupts(hdev,
7627 				gaudi_hbm_event_to_dev(event_type),
7628 				&eq_entry->hbm_ecc_data);
7629 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7630 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7631 		goto reset_device;
7632 
7633 	case GAUDI_EVENT_HBM0_SPI_1:
7634 	case GAUDI_EVENT_HBM1_SPI_1:
7635 	case GAUDI_EVENT_HBM2_SPI_1:
7636 	case GAUDI_EVENT_HBM3_SPI_1:
7637 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7638 		gaudi_hbm_read_interrupts(hdev,
7639 				gaudi_hbm_event_to_dev(event_type),
7640 				&eq_entry->hbm_ecc_data);
7641 		hl_fw_unmask_irq(hdev, event_type);
7642 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7643 		break;
7644 
7645 	case GAUDI_EVENT_TPC0_DEC:
7646 	case GAUDI_EVENT_TPC1_DEC:
7647 	case GAUDI_EVENT_TPC2_DEC:
7648 	case GAUDI_EVENT_TPC3_DEC:
7649 	case GAUDI_EVENT_TPC4_DEC:
7650 	case GAUDI_EVENT_TPC5_DEC:
7651 	case GAUDI_EVENT_TPC6_DEC:
7652 	case GAUDI_EVENT_TPC7_DEC:
7653 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7654 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7655 		 * The SW upper layer will inspect an internal mapped area to indicate
7656 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7657 		 */
7658 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7659 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7660 		reset_required = gaudi_tpc_read_interrupts(hdev,
7661 					tpc_dec_event_to_tpc_id(event_type),
7662 					"AXI_SLV_DEC_Error");
7663 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7664 		if (reset_required) {
7665 			dev_err(hdev->dev, "reset required due to %s\n",
7666 				gaudi_irq_map_table[event_type].name);
7667 
7668 			reset_direct = true;
7669 			goto reset_device;
7670 		} else {
7671 			hl_fw_unmask_irq(hdev, event_type);
7672 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7673 		}
7674 		break;
7675 
7676 	case GAUDI_EVENT_TPC0_KRN_ERR:
7677 	case GAUDI_EVENT_TPC1_KRN_ERR:
7678 	case GAUDI_EVENT_TPC2_KRN_ERR:
7679 	case GAUDI_EVENT_TPC3_KRN_ERR:
7680 	case GAUDI_EVENT_TPC4_KRN_ERR:
7681 	case GAUDI_EVENT_TPC5_KRN_ERR:
7682 	case GAUDI_EVENT_TPC6_KRN_ERR:
7683 	case GAUDI_EVENT_TPC7_KRN_ERR:
7684 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7685 		reset_required = gaudi_tpc_read_interrupts(hdev,
7686 					tpc_krn_event_to_tpc_id(event_type),
7687 					"KRN_ERR");
7688 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7689 		if (reset_required) {
7690 			dev_err(hdev->dev, "reset required due to %s\n",
7691 				gaudi_irq_map_table[event_type].name);
7692 
7693 			reset_direct = true;
7694 			goto reset_device;
7695 		} else {
7696 			hl_fw_unmask_irq(hdev, event_type);
7697 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7698 		}
7699 		break;
7700 
7701 	case GAUDI_EVENT_PCIE_CORE_SERR:
7702 	case GAUDI_EVENT_PCIE_IF_SERR:
7703 	case GAUDI_EVENT_PCIE_PHY_SERR:
7704 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7705 	case GAUDI_EVENT_MME0_ACC_SERR:
7706 	case GAUDI_EVENT_MME0_SBAB_SERR:
7707 	case GAUDI_EVENT_MME1_ACC_SERR:
7708 	case GAUDI_EVENT_MME1_SBAB_SERR:
7709 	case GAUDI_EVENT_MME2_ACC_SERR:
7710 	case GAUDI_EVENT_MME2_SBAB_SERR:
7711 	case GAUDI_EVENT_MME3_ACC_SERR:
7712 	case GAUDI_EVENT_MME3_SBAB_SERR:
7713 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7714 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7715 	case GAUDI_EVENT_PSOC_MEM_SERR:
7716 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7717 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7718 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7719 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7720 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7721 		fallthrough;
7722 	case GAUDI_EVENT_MMU_SERR:
7723 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7724 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7725 		hl_fw_unmask_irq(hdev, event_type);
7726 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7727 		break;
7728 
7729 	case GAUDI_EVENT_PCIE_DEC:
7730 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7731 	case GAUDI_EVENT_PSOC_AXI_DEC:
7732 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7733 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7734 		hl_fw_unmask_irq(hdev, event_type);
7735 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7736 		break;
7737 
7738 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7739 	case GAUDI_EVENT_MMU_WR_PERM:
7740 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7741 		hl_fw_unmask_irq(hdev, event_type);
7742 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7743 		break;
7744 
7745 	case GAUDI_EVENT_MME0_WBC_RSP:
7746 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7747 	case GAUDI_EVENT_MME1_WBC_RSP:
7748 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7749 	case GAUDI_EVENT_MME2_WBC_RSP:
7750 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7751 	case GAUDI_EVENT_MME3_WBC_RSP:
7752 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7753 	case GAUDI_EVENT_RAZWI_OR_ADC:
7754 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7755 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7756 		fallthrough;
7757 	case GAUDI_EVENT_NIC0_QM0:
7758 	case GAUDI_EVENT_NIC0_QM1:
7759 	case GAUDI_EVENT_NIC1_QM0:
7760 	case GAUDI_EVENT_NIC1_QM1:
7761 	case GAUDI_EVENT_NIC2_QM0:
7762 	case GAUDI_EVENT_NIC2_QM1:
7763 	case GAUDI_EVENT_NIC3_QM0:
7764 	case GAUDI_EVENT_NIC3_QM1:
7765 	case GAUDI_EVENT_NIC4_QM0:
7766 	case GAUDI_EVENT_NIC4_QM1:
7767 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7768 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7769 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7770 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7771 		hl_fw_unmask_irq(hdev, event_type);
7772 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7773 		break;
7774 
7775 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7776 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7777 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7778 		goto reset_device;
7779 
7780 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7781 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7782 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7783 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7784 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7785 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7786 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7787 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7788 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7789 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7790 		hl_fw_unmask_irq(hdev, event_type);
7791 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7792 		break;
7793 
7794 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7795 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7796 		hl_fw_unmask_irq(hdev, event_type);
7797 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7798 		break;
7799 
7800 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7801 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7802 		gaudi_print_sm_sei_info(hdev, event_type,
7803 					&eq_entry->sm_sei_data);
7804 		rc = hl_state_dump(hdev);
7805 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7806 		if (rc)
7807 			dev_err(hdev->dev,
7808 				"Error during system state dump %d\n", rc);
7809 		hl_fw_unmask_irq(hdev, event_type);
7810 		break;
7811 
7812 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7813 		break;
7814 
7815 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7816 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7817 		hl_fw_unmask_irq(hdev, event_type);
7818 		break;
7819 
7820 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7821 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7822 		dev_err(hdev->dev,
7823 			"Received high temp H/W interrupt %d (cause %d)\n",
7824 			event_type, cause);
7825 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7826 		break;
7827 
7828 	case GAUDI_EVENT_DEV_RESET_REQ:
7829 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7830 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7831 		goto reset_device;
7832 
7833 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7834 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7835 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7836 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7837 		goto reset_device;
7838 
7839 	case GAUDI_EVENT_FW_ALIVE_S:
7840 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7841 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7842 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7843 		fw_err_info.event_id = event_type;
7844 		fw_err_info.event_mask = &event_mask;
7845 		hl_handle_fw_err(hdev, &fw_err_info);
7846 		goto reset_device;
7847 
7848 	default:
7849 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7850 				event_type);
7851 		break;
7852 	}
7853 
7854 	if (event_mask)
7855 		hl_notifier_event_send_all(hdev, event_mask);
7856 
7857 	return;
7858 
7859 reset_device:
7860 	reset_required = true;
7861 
7862 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7863 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7864 
7865 		/* notify on device unavailable while the reset triggered by fw */
7866 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7867 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7868 	} else if (hdev->hard_reset_on_fw_events) {
7869 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7870 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7871 	} else {
7872 		reset_required = false;
7873 	}
7874 
7875 	if (reset_required) {
7876 		/* escalate general hw errors to critical/fatal error */
7877 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7878 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7879 
7880 		hl_device_cond_reset(hdev, flags, event_mask);
7881 	} else {
7882 		hl_fw_unmask_irq(hdev, event_type);
7883 		/* Notification on occurred event needs to be sent although reset is not executed */
7884 		if (event_mask)
7885 			hl_notifier_event_send_all(hdev, event_mask);
7886 	}
7887 }
7888 
7889 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7890 {
7891 	struct gaudi_device *gaudi = hdev->asic_specific;
7892 
7893 	if (aggregate) {
7894 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7895 		return gaudi->events_stat_aggregate;
7896 	}
7897 
7898 	*size = (u32) sizeof(gaudi->events_stat);
7899 	return gaudi->events_stat;
7900 }
7901 
7902 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7903 {
7904 	struct gaudi_device *gaudi = hdev->asic_specific;
7905 	u32 status, timeout_usec;
7906 	int rc;
7907 
7908 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7909 		hdev->reset_info.hard_reset_pending)
7910 		return 0;
7911 
7912 	if (hdev->pldm)
7913 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7914 	else
7915 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7916 
7917 	/* L0 & L1 invalidation */
7918 	WREG32(mmSTLB_INV_PS, 3);
7919 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7920 	WREG32(mmSTLB_INV_PS, 2);
7921 
7922 	rc = hl_poll_timeout(
7923 		hdev,
7924 		mmSTLB_INV_PS,
7925 		status,
7926 		!status,
7927 		1000,
7928 		timeout_usec);
7929 
7930 	WREG32(mmSTLB_INV_SET, 0);
7931 
7932 	return rc;
7933 }
7934 
7935 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7936 						bool is_hard, u32 flags,
7937 						u32 asid, u64 va, u64 size)
7938 {
7939 	/* Treat as invalidate all because there is no range invalidation
7940 	 * in Gaudi
7941 	 */
7942 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7943 }
7944 
7945 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7946 {
7947 	u32 status, timeout_usec;
7948 	int rc;
7949 
7950 	if (hdev->pldm)
7951 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7952 	else
7953 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7954 
7955 	WREG32(MMU_ASID, asid);
7956 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7957 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7958 	WREG32(MMU_BUSY, 0x80000000);
7959 
7960 	rc = hl_poll_timeout(
7961 		hdev,
7962 		MMU_BUSY,
7963 		status,
7964 		!(status & 0x80000000),
7965 		1000,
7966 		timeout_usec);
7967 
7968 	if (rc) {
7969 		dev_err(hdev->dev,
7970 			"Timeout during MMU hop0 config of asid %d\n", asid);
7971 		return rc;
7972 	}
7973 
7974 	return 0;
7975 }
7976 
7977 static int gaudi_send_heartbeat(struct hl_device *hdev)
7978 {
7979 	struct gaudi_device *gaudi = hdev->asic_specific;
7980 
7981 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7982 		return 0;
7983 
7984 	return hl_fw_send_heartbeat(hdev);
7985 }
7986 
7987 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7988 {
7989 	struct gaudi_device *gaudi = hdev->asic_specific;
7990 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7991 	int rc;
7992 
7993 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7994 		return 0;
7995 
7996 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7997 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7998 					mmCPU_BOOT_ERR1);
7999 	if (rc)
8000 		return rc;
8001 
8002 	if (!strlen(prop->cpucp_info.card_name))
8003 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8004 				CARD_NAME_MAX_LEN);
8005 
8006 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8007 
8008 	set_default_power_values(hdev);
8009 
8010 	return 0;
8011 }
8012 
8013 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8014 		struct engines_data *e)
8015 {
8016 	struct gaudi_device *gaudi = hdev->asic_specific;
8017 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8018 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8019 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8020 	unsigned long *mask = (unsigned long *)mask_arr;
8021 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8022 	bool is_idle = true, is_eng_idle, is_slave;
8023 	u64 offset;
8024 	int i, dma_id, port;
8025 
8026 	if (e)
8027 		hl_engine_data_sprintf(e,
8028 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8029 			"---  -------  ------------  ----------  -------------\n");
8030 
8031 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8032 		dma_id = gaudi_dma_assignment[i];
8033 		offset = dma_id * DMA_QMAN_OFFSET;
8034 
8035 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8036 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8037 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8038 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8039 				IS_DMA_IDLE(dma_core_sts0);
8040 		is_idle &= is_eng_idle;
8041 
8042 		if (mask && !is_eng_idle)
8043 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8044 		if (e)
8045 			hl_engine_data_sprintf(e, fmt, dma_id,
8046 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8047 				qm_cgm_sts, dma_core_sts0);
8048 	}
8049 
8050 	if (e)
8051 		hl_engine_data_sprintf(e,
8052 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8053 			"---  -------  ------------  ----------  ----------\n");
8054 
8055 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8056 		offset = i * TPC_QMAN_OFFSET;
8057 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8058 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8059 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8060 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8061 				IS_TPC_IDLE(tpc_cfg_sts);
8062 		is_idle &= is_eng_idle;
8063 
8064 		if (mask && !is_eng_idle)
8065 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8066 		if (e)
8067 			hl_engine_data_sprintf(e, fmt, i,
8068 				is_eng_idle ? "Y" : "N",
8069 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8070 	}
8071 
8072 	if (e)
8073 		hl_engine_data_sprintf(e,
8074 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8075 			"---  -------  ------------  ----------  -----------\n");
8076 
8077 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8078 		offset = i * MME_QMAN_OFFSET;
8079 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8080 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8081 
8082 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8083 		is_slave = i % 2;
8084 		if (!is_slave) {
8085 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8086 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8087 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8088 		}
8089 
8090 		is_idle &= is_eng_idle;
8091 
8092 		if (mask && !is_eng_idle)
8093 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8094 		if (e) {
8095 			if (!is_slave)
8096 				hl_engine_data_sprintf(e, fmt, i,
8097 					is_eng_idle ? "Y" : "N",
8098 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8099 			else
8100 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8101 					is_eng_idle ? "Y" : "N", "-",
8102 					"-", mme_arch_sts);
8103 		}
8104 	}
8105 
8106 	if (e)
8107 		hl_engine_data_sprintf(e,
8108 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8109 				"---  -------  ------------  ----------\n");
8110 
8111 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8112 		offset = i * NIC_MACRO_QMAN_OFFSET;
8113 		port = 2 * i;
8114 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8115 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8116 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8117 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8118 			is_idle &= is_eng_idle;
8119 
8120 			if (mask && !is_eng_idle)
8121 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8122 			if (e)
8123 				hl_engine_data_sprintf(e, nic_fmt, port,
8124 						is_eng_idle ? "Y" : "N",
8125 						qm_glbl_sts0, qm_cgm_sts);
8126 		}
8127 
8128 		port = 2 * i + 1;
8129 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8130 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8131 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8132 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8133 			is_idle &= is_eng_idle;
8134 
8135 			if (mask && !is_eng_idle)
8136 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8137 			if (e)
8138 				hl_engine_data_sprintf(e, nic_fmt, port,
8139 						is_eng_idle ? "Y" : "N",
8140 						qm_glbl_sts0, qm_cgm_sts);
8141 		}
8142 	}
8143 
8144 	if (e)
8145 		hl_engine_data_sprintf(e, "\n");
8146 
8147 	return is_idle;
8148 }
8149 
8150 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8151 	__acquires(&gaudi->hw_queues_lock)
8152 {
8153 	struct gaudi_device *gaudi = hdev->asic_specific;
8154 
8155 	spin_lock(&gaudi->hw_queues_lock);
8156 }
8157 
8158 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8159 	__releases(&gaudi->hw_queues_lock)
8160 {
8161 	struct gaudi_device *gaudi = hdev->asic_specific;
8162 
8163 	spin_unlock(&gaudi->hw_queues_lock);
8164 }
8165 
8166 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8167 {
8168 	return hdev->pdev->device;
8169 }
8170 
8171 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8172 				size_t max_size)
8173 {
8174 	struct gaudi_device *gaudi = hdev->asic_specific;
8175 
8176 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8177 		return 0;
8178 
8179 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8180 }
8181 
8182 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8183 {
8184 	struct gaudi_device *gaudi = hdev->asic_specific;
8185 
8186 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8187 		return 0;
8188 
8189 	return hl_fw_get_monitor_dump(hdev, data);
8190 }
8191 
8192 /*
8193  * this function should be used only during initialization and/or after reset,
8194  * when there are no active users.
8195  */
8196 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8197 {
8198 	u64 kernel_timeout;
8199 	u32 status, offset;
8200 	int rc;
8201 
8202 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8203 
8204 	if (hdev->pldm)
8205 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8206 	else
8207 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8208 
8209 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8210 			lower_32_bits(tpc_kernel));
8211 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8212 			upper_32_bits(tpc_kernel));
8213 
8214 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8215 			lower_32_bits(tpc_kernel));
8216 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8217 			upper_32_bits(tpc_kernel));
8218 	/* set a valid LUT pointer, content is of no significance */
8219 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8220 			lower_32_bits(tpc_kernel));
8221 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8222 			upper_32_bits(tpc_kernel));
8223 
8224 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8225 			lower_32_bits(CFG_BASE +
8226 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8227 
8228 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8229 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8230 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8231 	/* wait a bit for the engine to start executing */
8232 	usleep_range(1000, 1500);
8233 
8234 	/* wait until engine has finished executing */
8235 	rc = hl_poll_timeout(
8236 		hdev,
8237 		mmTPC0_CFG_STATUS + offset,
8238 		status,
8239 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8240 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8241 		1000,
8242 		kernel_timeout);
8243 
8244 	if (rc) {
8245 		dev_err(hdev->dev,
8246 			"Timeout while waiting for TPC%d icache prefetch\n",
8247 			tpc_id);
8248 		return -EIO;
8249 	}
8250 
8251 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8252 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8253 
8254 	/* wait a bit for the engine to start executing */
8255 	usleep_range(1000, 1500);
8256 
8257 	/* wait until engine has finished executing */
8258 	rc = hl_poll_timeout(
8259 		hdev,
8260 		mmTPC0_CFG_STATUS + offset,
8261 		status,
8262 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8263 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8264 		1000,
8265 		kernel_timeout);
8266 
8267 	if (rc) {
8268 		dev_err(hdev->dev,
8269 			"Timeout while waiting for TPC%d vector pipe\n",
8270 			tpc_id);
8271 		return -EIO;
8272 	}
8273 
8274 	rc = hl_poll_timeout(
8275 		hdev,
8276 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8277 		status,
8278 		(status == 0),
8279 		1000,
8280 		kernel_timeout);
8281 
8282 	if (rc) {
8283 		dev_err(hdev->dev,
8284 			"Timeout while waiting for TPC%d kernel to execute\n",
8285 			tpc_id);
8286 		return -EIO;
8287 	}
8288 
8289 	return 0;
8290 }
8291 
8292 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8293 		struct hl_ctx *ctx)
8294 {
8295 	struct gaudi_device *gaudi = hdev->asic_specific;
8296 	int min_alloc_order, rc, collective_cb_size;
8297 
8298 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8299 		return 0;
8300 
8301 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8302 							HOST_SPACE_INTERNAL_CB_SZ,
8303 							&hdev->internal_cb_pool_dma_addr,
8304 							GFP_KERNEL | __GFP_ZERO);
8305 
8306 	if (!hdev->internal_cb_pool_virt_addr)
8307 		return -ENOMEM;
8308 
8309 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8310 			sizeof(struct packet_fence);
8311 	min_alloc_order = ilog2(collective_cb_size);
8312 
8313 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8314 	if (!hdev->internal_cb_pool) {
8315 		dev_err(hdev->dev,
8316 			"Failed to create internal CB pool\n");
8317 		rc = -ENOMEM;
8318 		goto free_internal_cb_pool;
8319 	}
8320 
8321 	rc = gen_pool_add(hdev->internal_cb_pool,
8322 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8323 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8324 	if (rc) {
8325 		dev_err(hdev->dev,
8326 			"Failed to add memory to internal CB pool\n");
8327 		rc = -EFAULT;
8328 		goto destroy_internal_cb_pool;
8329 	}
8330 
8331 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8332 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8333 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8334 
8335 	if (!hdev->internal_cb_va_base) {
8336 		rc = -ENOMEM;
8337 		goto destroy_internal_cb_pool;
8338 	}
8339 
8340 	mutex_lock(&hdev->mmu_lock);
8341 
8342 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8343 			hdev->internal_cb_pool_dma_addr,
8344 			HOST_SPACE_INTERNAL_CB_SZ);
8345 	if (rc)
8346 		goto unreserve_internal_cb_pool;
8347 
8348 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8349 	if (rc)
8350 		goto unmap_internal_cb_pool;
8351 
8352 	mutex_unlock(&hdev->mmu_lock);
8353 
8354 	return 0;
8355 
8356 unmap_internal_cb_pool:
8357 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8358 			HOST_SPACE_INTERNAL_CB_SZ);
8359 unreserve_internal_cb_pool:
8360 	mutex_unlock(&hdev->mmu_lock);
8361 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8362 			HOST_SPACE_INTERNAL_CB_SZ);
8363 destroy_internal_cb_pool:
8364 	gen_pool_destroy(hdev->internal_cb_pool);
8365 free_internal_cb_pool:
8366 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8367 					hdev->internal_cb_pool_dma_addr);
8368 
8369 	return rc;
8370 }
8371 
8372 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8373 		struct hl_ctx *ctx)
8374 {
8375 	struct gaudi_device *gaudi = hdev->asic_specific;
8376 
8377 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8378 		return;
8379 
8380 	mutex_lock(&hdev->mmu_lock);
8381 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8382 			HOST_SPACE_INTERNAL_CB_SZ);
8383 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8384 			HOST_SPACE_INTERNAL_CB_SZ);
8385 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8386 	mutex_unlock(&hdev->mmu_lock);
8387 
8388 	gen_pool_destroy(hdev->internal_cb_pool);
8389 
8390 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8391 					hdev->internal_cb_pool_dma_addr);
8392 }
8393 
8394 static int gaudi_ctx_init(struct hl_ctx *ctx)
8395 {
8396 	int rc;
8397 
8398 	if (ctx->asid == HL_KERNEL_ASID_ID)
8399 		return 0;
8400 
8401 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8402 	if (rc)
8403 		return rc;
8404 
8405 	rc = gaudi_restore_user_registers(ctx->hdev);
8406 	if (rc)
8407 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8408 
8409 	return rc;
8410 }
8411 
8412 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8413 {
8414 	if (ctx->asid == HL_KERNEL_ASID_ID)
8415 		return;
8416 
8417 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8418 }
8419 
8420 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8421 {
8422 	return 0;
8423 }
8424 
8425 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8426 {
8427 	return gaudi_cq_assignment[cq_idx];
8428 }
8429 
8430 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8431 {
8432 	return sizeof(struct packet_msg_short) +
8433 			sizeof(struct packet_msg_prot) * 2;
8434 }
8435 
8436 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8437 {
8438 	return sizeof(struct packet_msg_short) * 4 +
8439 			sizeof(struct packet_fence) +
8440 			sizeof(struct packet_msg_prot) * 2;
8441 }
8442 
8443 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8444 {
8445 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8446 }
8447 
8448 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8449 				u32 size, bool eb)
8450 {
8451 	struct hl_cb *cb = (struct hl_cb *) data;
8452 	struct packet_msg_short *pkt;
8453 	u32 value, ctl, pkt_size = sizeof(*pkt);
8454 
8455 	pkt = cb->kernel_address + size;
8456 	memset(pkt, 0, pkt_size);
8457 
8458 	/* Inc by 1, Mode ADD */
8459 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8460 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8461 
8462 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8463 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8464 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8465 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8466 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8467 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8468 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8469 
8470 	pkt->value = cpu_to_le32(value);
8471 	pkt->ctl = cpu_to_le32(ctl);
8472 
8473 	return size + pkt_size;
8474 }
8475 
8476 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8477 					u16 addr)
8478 {
8479 	u32 ctl, pkt_size = sizeof(*pkt);
8480 
8481 	memset(pkt, 0, pkt_size);
8482 
8483 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8484 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8485 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8486 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8487 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8488 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8489 
8490 	pkt->value = cpu_to_le32(value);
8491 	pkt->ctl = cpu_to_le32(ctl);
8492 
8493 	return pkt_size;
8494 }
8495 
8496 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8497 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8498 		u16 sob_val, u16 mon_id)
8499 {
8500 	u64 monitor_base;
8501 	u32 ctl, value, pkt_size = sizeof(*pkt);
8502 	u16 msg_addr_offset;
8503 	u8 mask;
8504 
8505 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8506 		dev_err(hdev->dev,
8507 			"sob_base %u (mask %#x) is not valid\n",
8508 			sob_base, sob_mask);
8509 		return 0;
8510 	}
8511 
8512 	/*
8513 	 * monitor_base should be the content of the base0 address registers,
8514 	 * so it will be added to the msg short offsets
8515 	 */
8516 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8517 
8518 	msg_addr_offset =
8519 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8520 				monitor_base;
8521 
8522 	memset(pkt, 0, pkt_size);
8523 
8524 	/* Monitor config packet: bind the monitor to a sync object */
8525 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8526 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8527 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8528 			0); /* GREATER OR EQUAL*/
8529 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8530 
8531 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8532 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8533 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8534 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8535 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8536 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8537 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8538 
8539 	pkt->value = cpu_to_le32(value);
8540 	pkt->ctl = cpu_to_le32(ctl);
8541 
8542 	return pkt_size;
8543 }
8544 
8545 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8546 {
8547 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8548 
8549 	memset(pkt, 0, pkt_size);
8550 
8551 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8552 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8553 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8554 
8555 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8556 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8557 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8558 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8559 
8560 	pkt->cfg = cpu_to_le32(cfg);
8561 	pkt->ctl = cpu_to_le32(ctl);
8562 
8563 	return pkt_size;
8564 }
8565 
8566 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8567 {
8568 	u32 offset, nic_index;
8569 
8570 	switch (queue_id) {
8571 	case GAUDI_QUEUE_ID_DMA_0_0:
8572 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8573 		break;
8574 	case GAUDI_QUEUE_ID_DMA_0_1:
8575 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8576 		break;
8577 	case GAUDI_QUEUE_ID_DMA_0_2:
8578 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8579 		break;
8580 	case GAUDI_QUEUE_ID_DMA_0_3:
8581 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8582 		break;
8583 	case GAUDI_QUEUE_ID_DMA_1_0:
8584 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8585 		break;
8586 	case GAUDI_QUEUE_ID_DMA_1_1:
8587 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8588 		break;
8589 	case GAUDI_QUEUE_ID_DMA_1_2:
8590 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8591 		break;
8592 	case GAUDI_QUEUE_ID_DMA_1_3:
8593 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8594 		break;
8595 	case GAUDI_QUEUE_ID_DMA_5_0:
8596 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8597 		break;
8598 	case GAUDI_QUEUE_ID_DMA_5_1:
8599 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8600 		break;
8601 	case GAUDI_QUEUE_ID_DMA_5_2:
8602 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8603 		break;
8604 	case GAUDI_QUEUE_ID_DMA_5_3:
8605 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8606 		break;
8607 	case GAUDI_QUEUE_ID_TPC_7_0:
8608 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8609 		break;
8610 	case GAUDI_QUEUE_ID_TPC_7_1:
8611 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8612 		break;
8613 	case GAUDI_QUEUE_ID_TPC_7_2:
8614 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8615 		break;
8616 	case GAUDI_QUEUE_ID_TPC_7_3:
8617 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8618 		break;
8619 	case GAUDI_QUEUE_ID_NIC_0_0:
8620 	case GAUDI_QUEUE_ID_NIC_1_0:
8621 	case GAUDI_QUEUE_ID_NIC_2_0:
8622 	case GAUDI_QUEUE_ID_NIC_3_0:
8623 	case GAUDI_QUEUE_ID_NIC_4_0:
8624 	case GAUDI_QUEUE_ID_NIC_5_0:
8625 	case GAUDI_QUEUE_ID_NIC_6_0:
8626 	case GAUDI_QUEUE_ID_NIC_7_0:
8627 	case GAUDI_QUEUE_ID_NIC_8_0:
8628 	case GAUDI_QUEUE_ID_NIC_9_0:
8629 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8630 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8631 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8632 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8633 		break;
8634 	case GAUDI_QUEUE_ID_NIC_0_1:
8635 	case GAUDI_QUEUE_ID_NIC_1_1:
8636 	case GAUDI_QUEUE_ID_NIC_2_1:
8637 	case GAUDI_QUEUE_ID_NIC_3_1:
8638 	case GAUDI_QUEUE_ID_NIC_4_1:
8639 	case GAUDI_QUEUE_ID_NIC_5_1:
8640 	case GAUDI_QUEUE_ID_NIC_6_1:
8641 	case GAUDI_QUEUE_ID_NIC_7_1:
8642 	case GAUDI_QUEUE_ID_NIC_8_1:
8643 	case GAUDI_QUEUE_ID_NIC_9_1:
8644 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8645 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8646 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8647 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8648 		break;
8649 	case GAUDI_QUEUE_ID_NIC_0_2:
8650 	case GAUDI_QUEUE_ID_NIC_1_2:
8651 	case GAUDI_QUEUE_ID_NIC_2_2:
8652 	case GAUDI_QUEUE_ID_NIC_3_2:
8653 	case GAUDI_QUEUE_ID_NIC_4_2:
8654 	case GAUDI_QUEUE_ID_NIC_5_2:
8655 	case GAUDI_QUEUE_ID_NIC_6_2:
8656 	case GAUDI_QUEUE_ID_NIC_7_2:
8657 	case GAUDI_QUEUE_ID_NIC_8_2:
8658 	case GAUDI_QUEUE_ID_NIC_9_2:
8659 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8660 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8661 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8662 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8663 		break;
8664 	case GAUDI_QUEUE_ID_NIC_0_3:
8665 	case GAUDI_QUEUE_ID_NIC_1_3:
8666 	case GAUDI_QUEUE_ID_NIC_2_3:
8667 	case GAUDI_QUEUE_ID_NIC_3_3:
8668 	case GAUDI_QUEUE_ID_NIC_4_3:
8669 	case GAUDI_QUEUE_ID_NIC_5_3:
8670 	case GAUDI_QUEUE_ID_NIC_6_3:
8671 	case GAUDI_QUEUE_ID_NIC_7_3:
8672 	case GAUDI_QUEUE_ID_NIC_8_3:
8673 	case GAUDI_QUEUE_ID_NIC_9_3:
8674 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8675 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8676 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8677 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8678 		break;
8679 	default:
8680 		return -EINVAL;
8681 	}
8682 
8683 	*addr = CFG_BASE + offset;
8684 
8685 	return 0;
8686 }
8687 
8688 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8689 {
8690 	u64 monitor_base;
8691 	u32 size = 0;
8692 	u16 msg_addr_offset;
8693 
8694 	/*
8695 	 * monitor_base should be the content of the base0 address registers,
8696 	 * so it will be added to the msg short offsets
8697 	 */
8698 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8699 
8700 	/* First monitor config packet: low address of the sync */
8701 	msg_addr_offset =
8702 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8703 				monitor_base;
8704 
8705 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8706 					msg_addr_offset);
8707 
8708 	/* Second monitor config packet: high address of the sync */
8709 	msg_addr_offset =
8710 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8711 				monitor_base;
8712 
8713 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8714 					msg_addr_offset);
8715 
8716 	/*
8717 	 * Third monitor config packet: the payload, i.e. what to write when the
8718 	 * sync triggers
8719 	 */
8720 	msg_addr_offset =
8721 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8722 				monitor_base;
8723 
8724 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8725 
8726 	return size;
8727 }
8728 
8729 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8730 				struct hl_gen_wait_properties *prop)
8731 {
8732 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8733 	void *buf = cb->kernel_address;
8734 	u64 fence_addr = 0;
8735 	u32 size = prop->size;
8736 
8737 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8738 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8739 				prop->q_idx);
8740 		return 0;
8741 	}
8742 
8743 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8744 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8745 			prop->sob_mask, prop->sob_val, prop->mon_id);
8746 	size += gaudi_add_fence_pkt(buf + size);
8747 
8748 	return size;
8749 }
8750 
8751 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8752 {
8753 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8754 
8755 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8756 		hw_sob->sob_id);
8757 
8758 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8759 			hw_sob->sob_id * 4, 0);
8760 
8761 	kref_init(&hw_sob->kref);
8762 }
8763 
8764 static u64 gaudi_get_device_time(struct hl_device *hdev)
8765 {
8766 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8767 
8768 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8769 }
8770 
8771 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8772 				u32 *block_size, u32 *block_id)
8773 {
8774 	return -EPERM;
8775 }
8776 
8777 static int gaudi_block_mmap(struct hl_device *hdev,
8778 				struct vm_area_struct *vma,
8779 				u32 block_id, u32 block_size)
8780 {
8781 	return -EPERM;
8782 }
8783 
8784 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8785 {
8786 	struct cpu_dyn_regs *dyn_regs =
8787 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8788 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8789 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8790 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8791 
8792 	WREG32(irq_handler_offset,
8793 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8794 }
8795 
8796 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8797 {
8798 	return -EINVAL;
8799 }
8800 
8801 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8802 {
8803 	switch (pll_idx) {
8804 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8805 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8806 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8807 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8808 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8809 	case HL_GAUDI_MME_PLL: return MME_PLL;
8810 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8811 	case HL_GAUDI_IF_PLL: return IF_PLL;
8812 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8813 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8814 	default: return -EINVAL;
8815 	}
8816 }
8817 
8818 static int gaudi_add_sync_to_engine_map_entry(
8819 	struct hl_sync_to_engine_map *map, u32 reg_value,
8820 	enum hl_sync_engine_type engine_type, u32 engine_id)
8821 {
8822 	struct hl_sync_to_engine_map_entry *entry;
8823 
8824 	/* Reg value represents a partial address of sync object,
8825 	 * it is used as unique identifier. For this we need to
8826 	 * clear the cutoff cfg base bits from the value.
8827 	 */
8828 	if (reg_value == 0 || reg_value == 0xffffffff)
8829 		return 0;
8830 	reg_value -= lower_32_bits(CFG_BASE);
8831 
8832 	/* create a new hash entry */
8833 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8834 	if (!entry)
8835 		return -ENOMEM;
8836 	entry->engine_type = engine_type;
8837 	entry->engine_id = engine_id;
8838 	entry->sync_id = reg_value;
8839 	hash_add(map->tb, &entry->node, reg_value);
8840 
8841 	return 0;
8842 }
8843 
8844 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8845 				struct hl_sync_to_engine_map *map)
8846 {
8847 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8848 	int i, j, rc;
8849 	u32 reg_value;
8850 
8851 	/* Iterate over TPC engines */
8852 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8853 
8854 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8855 					sds->props[SP_NEXT_TPC] * i);
8856 
8857 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8858 							ENGINE_TPC, i);
8859 		if (rc)
8860 			goto free_sync_to_engine_map;
8861 	}
8862 
8863 	/* Iterate over MME engines */
8864 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8865 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8866 
8867 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8868 						sds->props[SP_NEXT_MME] * i +
8869 						j * sizeof(u32));
8870 
8871 			rc = gaudi_add_sync_to_engine_map_entry(
8872 				map, reg_value, ENGINE_MME,
8873 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8874 			if (rc)
8875 				goto free_sync_to_engine_map;
8876 		}
8877 	}
8878 
8879 	/* Iterate over DMA engines */
8880 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8881 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8882 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8883 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8884 							ENGINE_DMA, i);
8885 		if (rc)
8886 			goto free_sync_to_engine_map;
8887 	}
8888 
8889 	return 0;
8890 
8891 free_sync_to_engine_map:
8892 	hl_state_dump_free_sync_to_engine_map(map);
8893 
8894 	return rc;
8895 }
8896 
8897 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8898 {
8899 	return FIELD_GET(
8900 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8901 		mon->status);
8902 }
8903 
8904 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8905 {
8906 	const size_t max_write = 10;
8907 	u32 gid, mask, sob;
8908 	int i, offset;
8909 
8910 	/* Sync object ID is calculated as follows:
8911 	 * (8 * group_id + cleared bits in mask)
8912 	 */
8913 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8914 			mon->arm_data);
8915 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8916 			mon->arm_data);
8917 
8918 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8919 		max_write; mask >>= 1, i++) {
8920 		if (!(mask & 1)) {
8921 			sob = gid * MONITOR_MAX_SOBS + i;
8922 
8923 			if (offset > 0)
8924 				offset += snprintf(sobs + offset, max_write,
8925 							", ");
8926 
8927 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8928 		}
8929 	}
8930 }
8931 
8932 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8933 				struct hl_device *hdev,
8934 				struct hl_mon_state_dump *mon)
8935 {
8936 	const char *name;
8937 	char scratch_buf1[BIN_REG_STRING_SIZE],
8938 		scratch_buf2[BIN_REG_STRING_SIZE];
8939 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8940 
8941 	name = hl_state_dump_get_monitor_name(hdev, mon);
8942 	if (!name)
8943 		name = "";
8944 
8945 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8946 
8947 	return hl_snprintf_resize(
8948 		buf, size, offset,
8949 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8950 		mon->id, name,
8951 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8952 				mon->arm_data),
8953 		hl_format_as_binary(
8954 			scratch_buf1, sizeof(scratch_buf1),
8955 			FIELD_GET(
8956 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8957 				mon->arm_data)),
8958 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8959 				mon->arm_data),
8960 		mon->wr_data,
8961 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8962 		hl_format_as_binary(
8963 			scratch_buf2, sizeof(scratch_buf2),
8964 			FIELD_GET(
8965 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8966 				mon->status)),
8967 		monitored_sobs);
8968 }
8969 
8970 
8971 static int gaudi_print_fences_single_engine(
8972 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8973 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8974 	size_t *size, size_t *offset)
8975 {
8976 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8977 	int rc = -ENOMEM, i;
8978 	u32 *statuses, *fences;
8979 
8980 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8981 			sizeof(*statuses), GFP_KERNEL);
8982 	if (!statuses)
8983 		goto out;
8984 
8985 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8986 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8987 			 sizeof(*fences), GFP_KERNEL);
8988 	if (!fences)
8989 		goto free_status;
8990 
8991 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8992 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8993 
8994 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8995 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8996 		fences[i] = RREG32(base_offset + i * sizeof(u32));
8997 
8998 	/* The actual print */
8999 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9000 		u32 fence_id;
9001 		u64 fence_cnt, fence_rdata;
9002 		const char *engine_name;
9003 
9004 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9005 			statuses[i]))
9006 			continue;
9007 
9008 		fence_id =
9009 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9010 		fence_cnt = base_offset + CFG_BASE +
9011 			sizeof(u32) *
9012 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9013 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9014 				sds->props[SP_FENCE0_RDATA_OFFSET];
9015 		engine_name = hl_sync_engine_to_string(engine_type);
9016 
9017 		rc = hl_snprintf_resize(
9018 			buf, size, offset,
9019 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9020 			engine_name, engine_id,
9021 			i, fence_id,
9022 			fence_cnt, engine_name, engine_id, fence_id, i,
9023 			fence_rdata, engine_name, engine_id, fence_id, i,
9024 			fences[fence_id],
9025 			statuses[i]);
9026 		if (rc)
9027 			goto free_fences;
9028 	}
9029 
9030 	rc = 0;
9031 
9032 free_fences:
9033 	kfree(fences);
9034 free_status:
9035 	kfree(statuses);
9036 out:
9037 	return rc;
9038 }
9039 
9040 
9041 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9042 	.monitor_valid = gaudi_monitor_valid,
9043 	.print_single_monitor = gaudi_print_single_monitor,
9044 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9045 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9046 };
9047 
9048 static void gaudi_state_dump_init(struct hl_device *hdev)
9049 {
9050 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9051 	int i;
9052 
9053 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9054 		hash_add(sds->so_id_to_str_tb,
9055 			&gaudi_so_id_to_str[i].node,
9056 			gaudi_so_id_to_str[i].id);
9057 
9058 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9059 		hash_add(sds->monitor_id_to_str_tb,
9060 			&gaudi_monitor_id_to_str[i].node,
9061 			gaudi_monitor_id_to_str[i].id);
9062 
9063 	sds->props = gaudi_state_dump_specs_props;
9064 
9065 	sds->sync_namager_names = gaudi_sync_manager_names;
9066 
9067 	sds->funcs = gaudi_state_dump_funcs;
9068 }
9069 
9070 static u32 *gaudi_get_stream_master_qid_arr(void)
9071 {
9072 	return gaudi_stream_master;
9073 }
9074 
9075 static int gaudi_set_dram_properties(struct hl_device *hdev)
9076 {
9077 	return 0;
9078 }
9079 
9080 static int gaudi_set_binning_masks(struct hl_device *hdev)
9081 {
9082 	return 0;
9083 }
9084 
9085 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9086 {
9087 }
9088 
9089 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9090 {
9091 	struct hl_device *hdev = dev_get_drvdata(dev);
9092 	struct cpucp_info *cpucp_info;
9093 
9094 	cpucp_info = &hdev->asic_prop.cpucp_info;
9095 
9096 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9097 }
9098 
9099 static DEVICE_ATTR_RO(infineon_ver);
9100 
9101 static struct attribute *gaudi_vrm_dev_attrs[] = {
9102 	&dev_attr_infineon_ver.attr,
9103 	NULL,
9104 };
9105 
9106 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9107 					struct attribute_group *dev_vrm_attr_grp)
9108 {
9109 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9110 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9111 }
9112 
9113 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9114 {
9115 	return 0;
9116 }
9117 
9118 static const struct hl_asic_funcs gaudi_funcs = {
9119 	.early_init = gaudi_early_init,
9120 	.early_fini = gaudi_early_fini,
9121 	.late_init = gaudi_late_init,
9122 	.late_fini = gaudi_late_fini,
9123 	.sw_init = gaudi_sw_init,
9124 	.sw_fini = gaudi_sw_fini,
9125 	.hw_init = gaudi_hw_init,
9126 	.hw_fini = gaudi_hw_fini,
9127 	.halt_engines = gaudi_halt_engines,
9128 	.suspend = gaudi_suspend,
9129 	.resume = gaudi_resume,
9130 	.mmap = gaudi_mmap,
9131 	.ring_doorbell = gaudi_ring_doorbell,
9132 	.pqe_write = gaudi_pqe_write,
9133 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9134 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9135 	.scrub_device_mem = gaudi_scrub_device_mem,
9136 	.scrub_device_dram = gaudi_scrub_device_dram,
9137 	.get_int_queue_base = gaudi_get_int_queue_base,
9138 	.test_queues = gaudi_test_queues,
9139 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9140 	.asic_dma_pool_free = gaudi_dma_pool_free,
9141 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9142 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9143 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9144 	.cs_parser = gaudi_cs_parser,
9145 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9146 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9147 	.update_eq_ci = gaudi_update_eq_ci,
9148 	.context_switch = gaudi_context_switch,
9149 	.restore_phase_topology = gaudi_restore_phase_topology,
9150 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9151 	.add_device_attr = gaudi_add_device_attr,
9152 	.handle_eqe = gaudi_handle_eqe,
9153 	.get_events_stat = gaudi_get_events_stat,
9154 	.read_pte = gaudi_read_pte,
9155 	.write_pte = gaudi_write_pte,
9156 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9157 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9158 	.mmu_prefetch_cache_range = NULL,
9159 	.send_heartbeat = gaudi_send_heartbeat,
9160 	.debug_coresight = gaudi_debug_coresight,
9161 	.is_device_idle = gaudi_is_device_idle,
9162 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9163 	.hw_queues_lock = gaudi_hw_queues_lock,
9164 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9165 	.get_pci_id = gaudi_get_pci_id,
9166 	.get_eeprom_data = gaudi_get_eeprom_data,
9167 	.get_monitor_dump = gaudi_get_monitor_dump,
9168 	.send_cpu_message = gaudi_send_cpu_message,
9169 	.pci_bars_map = gaudi_pci_bars_map,
9170 	.init_iatu = gaudi_init_iatu,
9171 	.rreg = hl_rreg,
9172 	.wreg = hl_wreg,
9173 	.halt_coresight = gaudi_halt_coresight,
9174 	.ctx_init = gaudi_ctx_init,
9175 	.ctx_fini = gaudi_ctx_fini,
9176 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9177 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9178 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9179 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9180 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9181 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9182 	.gen_signal_cb = gaudi_gen_signal_cb,
9183 	.gen_wait_cb = gaudi_gen_wait_cb,
9184 	.reset_sob = gaudi_reset_sob,
9185 	.reset_sob_group = gaudi_reset_sob_group,
9186 	.get_device_time = gaudi_get_device_time,
9187 	.pb_print_security_errors = NULL,
9188 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9189 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9190 	.get_dec_base_addr = NULL,
9191 	.scramble_addr = hl_mmu_scramble_addr,
9192 	.descramble_addr = hl_mmu_descramble_addr,
9193 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9194 	.get_hw_block_id = gaudi_get_hw_block_id,
9195 	.hw_block_mmap = gaudi_block_mmap,
9196 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9197 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9198 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9199 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9200 	.init_firmware_loader = gaudi_init_firmware_loader,
9201 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9202 	.state_dump_init = gaudi_state_dump_init,
9203 	.get_sob_addr = gaudi_get_sob_addr,
9204 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9205 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9206 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9207 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9208 	.access_dev_mem = hl_access_dev_mem,
9209 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9210 	.send_device_activity = gaudi_send_device_activity,
9211 	.set_dram_properties = gaudi_set_dram_properties,
9212 	.set_binning_masks = gaudi_set_binning_masks,
9213 };
9214 
9215 /**
9216  * gaudi_set_asic_funcs - set GAUDI function pointers
9217  *
9218  * @hdev: pointer to hl_device structure
9219  *
9220  */
9221 void gaudi_set_asic_funcs(struct hl_device *hdev)
9222 {
9223 	hdev->asic_funcs = &gaudi_funcs;
9224 }
9225