xref: /openbmc/linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision 25ebbc57ca56df3cf9149e9da6b1d3169c8487db)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 		"gaudi cpu eq"
122 };
123 
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134 
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
137 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
138 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
139 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
140 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
141 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
142 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
143 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145 
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
148 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
149 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
150 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
151 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
152 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
153 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
154 	[PACKET_FENCE]		= sizeof(struct packet_fence),
155 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
156 	[PACKET_NOP]		= sizeof(struct packet_nop),
157 	[PACKET_STOP]		= sizeof(struct packet_stop),
158 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
159 	[PACKET_WAIT]		= sizeof(struct packet_wait),
160 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
161 };
162 
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 	switch (id) {
166 	case PACKET_WREG_32:
167 	case PACKET_WREG_BULK:
168 	case PACKET_MSG_LONG:
169 	case PACKET_MSG_SHORT:
170 	case PACKET_CP_DMA:
171 	case PACKET_REPEAT:
172 	case PACKET_MSG_PROT:
173 	case PACKET_FENCE:
174 	case PACKET_LIN_DMA:
175 	case PACKET_NOP:
176 	case PACKET_STOP:
177 	case PACKET_ARB_POINT:
178 	case PACKET_WAIT:
179 	case PACKET_LOAD_AND_EXE:
180 		return true;
181 	default:
182 		return false;
183 	}
184 }
185 
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 	"tpc_address_exceed_slm",
189 	"tpc_div_by_0",
190 	"tpc_spu_mac_overflow",
191 	"tpc_spu_addsub_overflow",
192 	"tpc_spu_abs_overflow",
193 	"tpc_spu_fp_dst_nan_inf",
194 	"tpc_spu_fp_dst_denorm",
195 	"tpc_vpu_mac_overflow",
196 	"tpc_vpu_addsub_overflow",
197 	"tpc_vpu_abs_overflow",
198 	"tpc_vpu_fp_dst_nan_inf",
199 	"tpc_vpu_fp_dst_denorm",
200 	"tpc_assertions",
201 	"tpc_illegal_instruction",
202 	"tpc_pc_wrap_around",
203 	"tpc_qm_sw_err",
204 	"tpc_hbw_rresp_err",
205 	"tpc_hbw_bresp_err",
206 	"tpc_lbw_rresp_err",
207 	"tpc_lbw_bresp_err"
208 };
209 
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 	"PQ AXI HBW error",
213 	"CQ AXI HBW error",
214 	"CP AXI HBW error",
215 	"CP error due to undefined OPCODE",
216 	"CP encountered STOP OPCODE",
217 	"CP AXI LBW error",
218 	"CP WRREG32 or WRBULK returned error",
219 	"N/A",
220 	"FENCE 0 inc over max value and clipped",
221 	"FENCE 1 inc over max value and clipped",
222 	"FENCE 2 inc over max value and clipped",
223 	"FENCE 3 inc over max value and clipped",
224 	"FENCE 0 dec under min value and clipped",
225 	"FENCE 1 dec under min value and clipped",
226 	"FENCE 2 dec under min value and clipped",
227 	"FENCE 3 dec under min value and clipped"
228 };
229 
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 	"Choice push while full error",
233 	"Choice Q watchdog error",
234 	"MSG AXI LBW returned with error"
235 };
236 
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352 
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382 
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396 
397 static s64 gaudi_state_dump_specs_props[] = {
398 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 	[SP_MON_OBJ_WR_ADDR_LOW] =
402 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 	[SP_MON_OBJ_WR_ADDR_HIGH] =
404 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 	[SP_FENCE0_CNT_OFFSET] =
426 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_FENCE0_RDATA_OFFSET] =
428 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 	[SP_NUM_CORES] = 1,
431 };
432 
433 static const int gaudi_queue_id_to_engine_id[] = {
434 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464 
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469 	"SYNC_MGR_E_N",
470 	"SYNC_MGR_W_N",
471 	"SYNC_MGR_E_S",
472 	"SYNC_MGR_W_S",
473 	NULL
474 };
475 
476 struct ecc_info_extract_params {
477 	u64 block_address;
478 	u32 num_memories;
479 	bool derr;
480 };
481 
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 								u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 					struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 					u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 					u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 				u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 				u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 				struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 		return HL_COLLECTIVE_MASTER;
505 
506 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 		return HL_COLLECTIVE_SLAVE;
509 
510 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 		return HL_COLLECTIVE_SLAVE;
513 
514 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 		return HL_COLLECTIVE_SLAVE;
517 
518 	return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520 
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 	struct asic_fixed_properties *prop = &hdev->asic_prop;
524 
525 	if (hdev->card_type == cpucp_card_type_pmc) {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527 
528 		if (prop->fw_security_enabled)
529 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 		else
531 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 	} else {
533 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 	}
536 }
537 
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 	struct asic_fixed_properties *prop = &hdev->asic_prop;
541 	u32 num_sync_stream_queues = 0;
542 	int i;
543 
544 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 	prop->hw_queues_props = kcalloc(prop->max_queues,
546 			sizeof(struct hw_queue_properties),
547 			GFP_KERNEL);
548 
549 	if (!prop->hw_queues_props)
550 		return -ENOMEM;
551 
552 	for (i = 0 ; i < prop->max_queues ; i++) {
553 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 			prop->hw_queues_props[i].driver_only = 0;
556 			prop->hw_queues_props[i].supports_sync_stream = 1;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 			num_sync_stream_queues++;
560 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 			prop->hw_queues_props[i].driver_only = 1;
563 			prop->hw_queues_props[i].supports_sync_stream = 0;
564 			prop->hw_queues_props[i].cb_alloc_flags =
565 				CB_ALLOC_KERNEL;
566 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 			prop->hw_queues_props[i].driver_only = 0;
569 			prop->hw_queues_props[i].supports_sync_stream = 0;
570 			prop->hw_queues_props[i].cb_alloc_flags =
571 				CB_ALLOC_USER;
572 
573 		}
574 		prop->hw_queues_props[i].collective_mode =
575 						get_collective_mode(hdev, i);
576 	}
577 
578 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 	prop->cfg_base_address = CFG_BASE;
580 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 	prop->host_base_address = HOST_PHYS_BASE;
582 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 	prop->collective_first_sob = 0;
586 	prop->collective_first_mon = 0;
587 
588 	/* 2 SOBs per internal queue stream are reserved for collective */
589 	prop->sync_stream_first_sob =
590 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 			* QMAN_STREAMS * HL_RSVD_SOBS;
592 
593 	/* 1 monitor per internal queue stream are reserved for collective
594 	 * 2 monitors per external queue stream are reserved for collective
595 	 */
596 	prop->sync_stream_first_mon =
597 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 			(NUMBER_OF_EXT_HW_QUEUES * 2);
599 
600 	prop->dram_base_address = DRAM_PHYS_BASE;
601 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604 
605 	prop->sram_base_address = SRAM_BASE_ADDR;
606 	prop->sram_size = SRAM_SIZE;
607 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 	prop->sram_user_base_address =
609 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610 
611 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613 
614 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 	if (hdev->pldm)
616 		prop->mmu_pgt_size = 0x800000; /* 8MB */
617 	else
618 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 	prop->mmu_pte_size = HL_PTE_SIZE;
620 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 	prop->dram_page_size = PAGE_SIZE_2MB;
623 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 	prop->dram_supports_virtual_memory = false;
625 
626 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 	prop->pmmu.end_addr =
638 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 	prop->pmmu.page_size = PAGE_SIZE_4KB;
640 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 	prop->pmmu.last_mask = LAST_MASK;
642 	/* TODO: will be duplicated until implementing per-MMU props */
643 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645 
646 	/* PMMU and HPMMU are the same except of page size */
647 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649 
650 	/* shifts and masks are the same in PMMU and DMMU */
651 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 	prop->dmmu.page_size = PAGE_SIZE_2MB;
655 
656 	prop->cfg_size = CFG_SIZE;
657 	prop->max_asid = MAX_ASID;
658 	prop->num_of_events = GAUDI_EVENT_SIZE;
659 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660 
661 	set_default_power_values(hdev);
662 
663 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665 
666 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668 
669 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670 					CARD_NAME_MAX_LEN);
671 
672 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673 
674 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_sob +
676 			(num_sync_stream_queues * HL_RSVD_SOBS);
677 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678 			prop->sync_stream_first_mon +
679 			(num_sync_stream_queues * HL_RSVD_MONS);
680 
681 	prop->first_available_user_interrupt = USHRT_MAX;
682 	prop->tpc_interrupt_id = USHRT_MAX;
683 
684 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 		prop->first_available_cq[i] = USHRT_MAX;
686 
687 	prop->fw_cpu_boot_dev_sts0_valid = false;
688 	prop->fw_cpu_boot_dev_sts1_valid = false;
689 	prop->hard_reset_done_by_fw = false;
690 	prop->gic_interrupts_enable = true;
691 
692 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693 
694 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696 
697 	prop->use_get_power_for_reset_history = true;
698 
699 	prop->configurable_stop_on_err = true;
700 
701 	prop->set_max_power_on_device_init = true;
702 
703 	prop->dma_mask = 48;
704 
705 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706 
707 	return 0;
708 }
709 
710 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 {
712 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 	bool is_wc[3] = {false, false, true};
714 	int rc;
715 
716 	rc = hl_pci_bars_map(hdev, name, is_wc);
717 	if (rc)
718 		return rc;
719 
720 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
722 
723 	return 0;
724 }
725 
726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 {
728 	struct gaudi_device *gaudi = hdev->asic_specific;
729 	struct hl_inbound_pci_region pci_region;
730 	u64 old_addr = addr;
731 	int rc;
732 
733 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 		return old_addr;
735 
736 	if (hdev->asic_prop.iatu_done_by_fw)
737 		return U64_MAX;
738 
739 	/* Inbound Region 2 - Bar 4 - Point to HBM */
740 	pci_region.mode = PCI_BAR_MATCH_MODE;
741 	pci_region.bar = HBM_BAR_ID;
742 	pci_region.addr = addr;
743 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744 	if (rc)
745 		return U64_MAX;
746 
747 	if (gaudi) {
748 		old_addr = gaudi->hbm_bar_cur_addr;
749 		gaudi->hbm_bar_cur_addr = addr;
750 	}
751 
752 	return old_addr;
753 }
754 
755 static int gaudi_init_iatu(struct hl_device *hdev)
756 {
757 	struct hl_inbound_pci_region inbound_region;
758 	struct hl_outbound_pci_region outbound_region;
759 	int rc;
760 
761 	if (hdev->asic_prop.iatu_done_by_fw)
762 		return 0;
763 
764 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 	inbound_region.mode = PCI_BAR_MATCH_MODE;
766 	inbound_region.bar = SRAM_BAR_ID;
767 	inbound_region.addr = SRAM_BASE_ADDR;
768 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 	if (rc)
770 		goto done;
771 
772 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 	inbound_region.mode = PCI_BAR_MATCH_MODE;
774 	inbound_region.bar = CFG_BAR_ID;
775 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 	if (rc)
778 		goto done;
779 
780 	/* Inbound Region 2 - Bar 4 - Point to HBM */
781 	inbound_region.mode = PCI_BAR_MATCH_MODE;
782 	inbound_region.bar = HBM_BAR_ID;
783 	inbound_region.addr = DRAM_PHYS_BASE;
784 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 	if (rc)
786 		goto done;
787 
788 	/* Outbound Region 0 - Point to Host */
789 	outbound_region.addr = HOST_PHYS_BASE;
790 	outbound_region.size = HOST_PHYS_SIZE;
791 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792 
793 done:
794 	return rc;
795 }
796 
797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 {
799 	return RREG32(mmHW_STATE);
800 }
801 
802 static int gaudi_early_init(struct hl_device *hdev)
803 {
804 	struct asic_fixed_properties *prop = &hdev->asic_prop;
805 	struct pci_dev *pdev = hdev->pdev;
806 	resource_size_t pci_bar_size;
807 	u32 fw_boot_status;
808 	int rc;
809 
810 	rc = gaudi_set_fixed_properties(hdev);
811 	if (rc) {
812 		dev_err(hdev->dev, "Failed setting fixed properties\n");
813 		return rc;
814 	}
815 
816 	/* Check BAR sizes */
817 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818 
819 	if (pci_bar_size != SRAM_BAR_SIZE) {
820 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 		rc = -ENODEV;
823 		goto free_queue_props;
824 	}
825 
826 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827 
828 	if (pci_bar_size != CFG_BAR_SIZE) {
829 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 		rc = -ENODEV;
832 		goto free_queue_props;
833 	}
834 
835 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837 
838 	/* If FW security is enabled at this point it means no access to ELBI */
839 	if (hdev->asic_prop.fw_security_enabled) {
840 		hdev->asic_prop.iatu_done_by_fw = true;
841 
842 		/*
843 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 		 * decision can only be taken based on PCI ID security.
845 		 */
846 		hdev->asic_prop.gic_interrupts_enable = false;
847 		goto pci_init;
848 	}
849 
850 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 				&fw_boot_status);
852 	if (rc)
853 		goto free_queue_props;
854 
855 	/* Check whether FW is configuring iATU */
856 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 		hdev->asic_prop.iatu_done_by_fw = true;
859 
860 pci_init:
861 	rc = hl_pci_init(hdev);
862 	if (rc)
863 		goto free_queue_props;
864 
865 	/* Before continuing in the initialization, we need to read the preboot
866 	 * version to determine whether we run with a security-enabled firmware
867 	 */
868 	rc = hl_fw_read_preboot_status(hdev);
869 	if (rc) {
870 		if (hdev->reset_on_preboot_fail)
871 			/* we are already on failure flow, so don't check if hw_fini fails. */
872 			hdev->asic_funcs->hw_fini(hdev, true, false);
873 		goto pci_fini;
874 	}
875 
876 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 		if (rc) {
880 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 			goto pci_fini;
882 		}
883 	}
884 
885 	return 0;
886 
887 pci_fini:
888 	hl_pci_fini(hdev);
889 free_queue_props:
890 	kfree(hdev->asic_prop.hw_queues_props);
891 	return rc;
892 }
893 
894 static int gaudi_early_fini(struct hl_device *hdev)
895 {
896 	kfree(hdev->asic_prop.hw_queues_props);
897 	hl_pci_fini(hdev);
898 
899 	return 0;
900 }
901 
902 /**
903  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904  *
905  * @hdev: pointer to hl_device structure
906  *
907  */
908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 {
910 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 	struct asic_fixed_properties *prop = &hdev->asic_prop;
912 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 	int rc;
914 
915 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 		struct gaudi_device *gaudi = hdev->asic_specific;
918 
919 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 			return 0;
921 
922 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923 
924 		if (rc)
925 			return rc;
926 
927 		freq = pll_freq_arr[2];
928 	} else {
929 		/* Backward compatibility */
930 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 		nr = RREG32(mmPSOC_CPU_PLL_NR);
933 		nf = RREG32(mmPSOC_CPU_PLL_NF);
934 		od = RREG32(mmPSOC_CPU_PLL_OD);
935 
936 		if (div_sel == DIV_SEL_REF_CLK ||
937 				div_sel == DIV_SEL_DIVIDED_REF) {
938 			if (div_sel == DIV_SEL_REF_CLK)
939 				freq = PLL_REF_CLK;
940 			else
941 				freq = PLL_REF_CLK / (div_fctr + 1);
942 		} else if (div_sel == DIV_SEL_PLL_CLK ||
943 			div_sel == DIV_SEL_DIVIDED_PLL) {
944 			pll_clk = PLL_REF_CLK * (nf + 1) /
945 					((nr + 1) * (od + 1));
946 			if (div_sel == DIV_SEL_PLL_CLK)
947 				freq = pll_clk;
948 			else
949 				freq = pll_clk / (div_fctr + 1);
950 		} else {
951 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 			freq = 0;
953 		}
954 	}
955 
956 	prop->psoc_timestamp_frequency = freq;
957 	prop->psoc_pci_pll_nr = nr;
958 	prop->psoc_pci_pll_nf = nf;
959 	prop->psoc_pci_pll_od = od;
960 	prop->psoc_pci_pll_div_factor = div_fctr;
961 
962 	return 0;
963 }
964 
965 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 {
968 	struct asic_fixed_properties *prop = &hdev->asic_prop;
969 	struct packet_lin_dma *init_tpc_mem_pkt;
970 	struct hl_cs_job *job;
971 	struct hl_cb *cb;
972 	u64 dst_addr;
973 	u32 cb_size, ctl;
974 	u8 tpc_id;
975 	int rc;
976 
977 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978 	if (!cb)
979 		return -EFAULT;
980 
981 	init_tpc_mem_pkt = cb->kernel_address;
982 	cb_size = sizeof(*init_tpc_mem_pkt);
983 	memset(init_tpc_mem_pkt, 0, cb_size);
984 
985 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986 
987 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991 
992 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993 
994 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995 
996 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 				round_up(prop->sram_user_base_address, SZ_8K));
999 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000 
1001 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 	if (!job) {
1003 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 		rc = -ENOMEM;
1005 		goto release_cb;
1006 	}
1007 
1008 	job->id = 0;
1009 	job->user_cb = cb;
1010 	atomic_inc(&job->user_cb->cs_cnt);
1011 	job->user_cb_size = cb_size;
1012 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 	job->patched_cb = job->user_cb;
1014 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015 
1016 	hl_debugfs_add_job(hdev, job);
1017 
1018 	rc = gaudi_send_job_on_qman0(hdev, job);
1019 
1020 	if (rc)
1021 		goto free_job;
1022 
1023 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025 		if (rc)
1026 			break;
1027 	}
1028 
1029 free_job:
1030 	hl_userptr_delete_list(hdev, &job->userptr_list);
1031 	hl_debugfs_remove_job(hdev, job);
1032 	kfree(job);
1033 	atomic_dec(&cb->cs_cnt);
1034 
1035 release_cb:
1036 	hl_cb_put(cb);
1037 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038 
1039 	return rc;
1040 }
1041 
1042 /*
1043  * gaudi_init_tpc_mem() - Initialize TPC memories.
1044  * @hdev: Pointer to hl_device structure.
1045  *
1046  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047  *
1048  * Return: 0 for success, negative value for error.
1049  */
1050 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 {
1052 	const struct firmware *fw;
1053 	size_t fw_size;
1054 	void *cpu_addr;
1055 	dma_addr_t dma_handle;
1056 	int rc, count = 5;
1057 
1058 again:
1059 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060 	if (rc == -EINTR && count-- > 0) {
1061 		msleep(50);
1062 		goto again;
1063 	}
1064 
1065 	if (rc) {
1066 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 				GAUDI_TPC_FW_FILE);
1068 		goto out;
1069 	}
1070 
1071 	fw_size = fw->size;
1072 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 	if (!cpu_addr) {
1074 		dev_err(hdev->dev,
1075 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1076 			fw_size);
1077 		rc = -ENOMEM;
1078 		goto out;
1079 	}
1080 
1081 	memcpy(cpu_addr, fw->data, fw_size);
1082 
1083 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084 
1085 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086 
1087 out:
1088 	release_firmware(fw);
1089 	return rc;
1090 }
1091 
1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 {
1094 	struct gaudi_device *gaudi = hdev->asic_specific;
1095 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 	struct hl_hw_queue *q;
1097 	u32 i, sob_id, sob_group_id, queue_id;
1098 
1099 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1100 	sob_group_id =
1101 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103 
1104 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 	}
1109 
1110 	/* Both DMA5 and TPC7 use the same resources since only a single
1111 	 * engine need to participate in the reduction process
1112 	 */
1113 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 	q = &hdev->kernel_queues[queue_id];
1115 	q->sync_stream_prop.collective_sob_id =
1116 			sob_id + NIC_NUMBER_OF_ENGINES;
1117 
1118 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 	q = &hdev->kernel_queues[queue_id];
1120 	q->sync_stream_prop.collective_sob_id =
1121 			sob_id + NIC_NUMBER_OF_ENGINES;
1122 }
1123 
1124 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 {
1126 	struct gaudi_hw_sob_group *hw_sob_group =
1127 		container_of(ref, struct gaudi_hw_sob_group, kref);
1128 	struct hl_device *hdev = hw_sob_group->hdev;
1129 	int i;
1130 
1131 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134 
1135 	kref_init(&hw_sob_group->kref);
1136 }
1137 
1138 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 {
1140 	struct gaudi_hw_sob_group *hw_sob_group =
1141 		container_of(ref, struct gaudi_hw_sob_group, kref);
1142 	struct hl_device *hdev = hw_sob_group->hdev;
1143 
1144 	dev_crit(hdev->dev,
1145 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1146 		hw_sob_group->base_sob_id);
1147 }
1148 
1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 {
1151 	struct gaudi_collective_properties *prop;
1152 	int i;
1153 
1154 	prop = &gaudi->collective_props;
1155 
1156 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157 
1158 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 	/* Set collective engine bit */
1163 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165 }
1166 
1167 static int gaudi_collective_init(struct hl_device *hdev)
1168 {
1169 	u32 i, sob_id, reserved_sobs_per_group;
1170 	struct gaudi_collective_properties *prop;
1171 	struct gaudi_device *gaudi;
1172 
1173 	gaudi = hdev->asic_specific;
1174 	prop = &gaudi->collective_props;
1175 	sob_id = hdev->asic_prop.collective_first_sob;
1176 
1177 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 	reserved_sobs_per_group =
1179 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180 
1181 	/* Init SOB groups */
1182 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 		prop->hw_sob_group[i].hdev = hdev;
1184 		prop->hw_sob_group[i].base_sob_id = sob_id;
1185 		sob_id += reserved_sobs_per_group;
1186 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187 	}
1188 
1189 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 		prop->next_sob_group_val[i] = 1;
1191 		prop->curr_sob_group_idx[i] = 0;
1192 		gaudi_collective_map_sobs(hdev, i);
1193 	}
1194 
1195 	gaudi_collective_mstr_sob_mask_set(gaudi);
1196 
1197 	return 0;
1198 }
1199 
1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 {
1202 	struct gaudi_device *gaudi = hdev->asic_specific;
1203 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204 
1205 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1206 					gaudi_sob_group_hw_reset);
1207 }
1208 
1209 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 {
1212 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 	struct gaudi_collective_properties *cprop;
1214 	struct hl_gen_wait_properties wait_prop;
1215 	struct hl_sync_stream_properties *prop;
1216 	struct gaudi_device *gaudi;
1217 
1218 	gaudi = hdev->asic_specific;
1219 	cprop = &gaudi->collective_props;
1220 	queue_id = job->hw_queue_id;
1221 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222 
1223 	master_sob_base =
1224 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 	master_monitor = prop->collective_mstr_mon_id[0];
1226 
1227 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228 
1229 	dev_dbg(hdev->dev,
1230 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 		master_sob_base, cprop->mstr_sob_mask[0],
1232 		cprop->next_sob_group_val[stream],
1233 		master_monitor, queue_id);
1234 
1235 	wait_prop.data = (void *) job->patched_cb;
1236 	wait_prop.sob_base = master_sob_base;
1237 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 	wait_prop.mon_id = master_monitor;
1240 	wait_prop.q_idx = queue_id;
1241 	wait_prop.size = cb_size;
1242 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243 
1244 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 	master_monitor = prop->collective_mstr_mon_id[1];
1246 
1247 	dev_dbg(hdev->dev,
1248 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 		master_sob_base, cprop->mstr_sob_mask[1],
1250 		cprop->next_sob_group_val[stream],
1251 		master_monitor, queue_id);
1252 
1253 	wait_prop.sob_base = master_sob_base;
1254 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 	wait_prop.mon_id = master_monitor;
1256 	wait_prop.size = cb_size;
1257 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258 }
1259 
1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 {
1263 	struct hl_gen_wait_properties wait_prop;
1264 	struct hl_sync_stream_properties *prop;
1265 	u32 queue_id, cb_size = 0;
1266 
1267 	queue_id = job->hw_queue_id;
1268 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269 
1270 	if (job->cs->encaps_signals) {
1271 		/* use the encaps signal handle store earlier in the flow
1272 		 * and set the SOB information from the encaps
1273 		 * signals handle
1274 		 */
1275 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276 						cs_cmpl);
1277 
1278 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1279 				job->cs->sequence,
1280 				cs_cmpl->hw_sob->sob_id,
1281 				cs_cmpl->sob_val);
1282 	}
1283 
1284 	/* Add to wait CBs using slave monitor */
1285 	wait_prop.data = (void *) job->user_cb;
1286 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 	wait_prop.sob_mask = 0x1;
1288 	wait_prop.sob_val = cs_cmpl->sob_val;
1289 	wait_prop.mon_id = prop->collective_slave_mon_id;
1290 	wait_prop.q_idx = queue_id;
1291 	wait_prop.size = cb_size;
1292 
1293 	dev_dbg(hdev->dev,
1294 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 		prop->collective_slave_mon_id, queue_id);
1297 
1298 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299 
1300 	dev_dbg(hdev->dev,
1301 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 		prop->collective_sob_id, queue_id);
1303 
1304 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305 			prop->collective_sob_id, cb_size, false);
1306 }
1307 
1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 {
1310 	struct hl_cs_compl *signal_cs_cmpl =
1311 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 	struct hl_cs_compl *cs_cmpl =
1313 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 	struct gaudi_collective_properties *cprop;
1316 	u32 stream, queue_id, sob_group_offset;
1317 	struct gaudi_device *gaudi;
1318 	struct hl_device *hdev;
1319 	struct hl_cs_job *job;
1320 	struct hl_ctx *ctx;
1321 
1322 	ctx = cs->ctx;
1323 	hdev = ctx->hdev;
1324 	gaudi = hdev->asic_specific;
1325 	cprop = &gaudi->collective_props;
1326 
1327 	if (cs->encaps_signals) {
1328 		cs_cmpl->hw_sob = handle->hw_sob;
1329 		/* at this checkpoint we only need the hw_sob pointer
1330 		 * for the completion check before start going over the jobs
1331 		 * of the master/slaves, the sob_value will be taken later on
1332 		 * in gaudi_collective_slave_init_job depends on each
1333 		 * job wait offset value.
1334 		 */
1335 		cs_cmpl->sob_val = 0;
1336 	} else {
1337 		/* copy the SOB id and value of the signal CS */
1338 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 	}
1341 
1342 	/* check again if the signal cs already completed.
1343 	 * if yes then don't send any wait cs since the hw_sob
1344 	 * could be in reset already. if signal is not completed
1345 	 * then get refcount to hw_sob to prevent resetting the sob
1346 	 * while wait cs is not submitted.
1347 	 * note that this check is protected by two locks,
1348 	 * hw queue lock and completion object lock,
1349 	 * and the same completion object lock also protects
1350 	 * the hw_sob reset handler function.
1351 	 * The hw_queue lock prevent out of sync of hw_sob
1352 	 * refcount value, changed by signal/wait flows.
1353 	 */
1354 	spin_lock(&signal_cs_cmpl->lock);
1355 
1356 	if (completion_done(&cs->signal_fence->completion)) {
1357 		spin_unlock(&signal_cs_cmpl->lock);
1358 		return -EINVAL;
1359 	}
1360 	/* Increment kref since all slave queues are now waiting on it */
1361 	kref_get(&cs_cmpl->hw_sob->kref);
1362 
1363 	spin_unlock(&signal_cs_cmpl->lock);
1364 
1365 	/* Calculate the stream from collective master queue (1st job) */
1366 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 	stream = job->hw_queue_id % 4;
1368 	sob_group_offset =
1369 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370 
1371 	list_for_each_entry(job, &cs->job_list, cs_node) {
1372 		queue_id = job->hw_queue_id;
1373 
1374 		if (hdev->kernel_queues[queue_id].collective_mode ==
1375 				HL_COLLECTIVE_MASTER)
1376 			gaudi_collective_master_init_job(hdev, job, stream,
1377 						sob_group_offset);
1378 		else
1379 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 	}
1381 
1382 	cs_cmpl->sob_group = sob_group_offset;
1383 
1384 	/* Handle sob group kref and wraparound */
1385 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386 	cprop->next_sob_group_val[stream]++;
1387 
1388 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 		/*
1390 		 * Decrement as we reached the max value.
1391 		 * The release function won't be called here as we've
1392 		 * just incremented the refcount.
1393 		 */
1394 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395 				gaudi_sob_group_reset_error);
1396 		cprop->next_sob_group_val[stream] = 1;
1397 		/* only two SOBs are currently in use */
1398 		cprop->curr_sob_group_idx[stream] =
1399 			(cprop->curr_sob_group_idx[stream] + 1) &
1400 							(HL_RSVD_SOBS - 1);
1401 
1402 		gaudi_collective_map_sobs(hdev, stream);
1403 
1404 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 				cprop->curr_sob_group_idx[stream], stream);
1406 	}
1407 
1408 	mb();
1409 	hl_fence_put(cs->signal_fence);
1410 	cs->signal_fence = NULL;
1411 
1412 	return 0;
1413 }
1414 
1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 {
1417 	u32 cacheline_end, additional_commands;
1418 
1419 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1421 
1422 	if (user_cb_size + additional_commands > cacheline_end)
1423 		return cacheline_end - user_cb_size + additional_commands;
1424 	else
1425 		return additional_commands;
1426 }
1427 
1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 		struct hl_ctx *ctx, struct hl_cs *cs,
1430 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 		u32 encaps_signal_offset)
1432 {
1433 	struct hw_queue_properties *hw_queue_prop;
1434 	struct hl_cs_counters_atomic *cntr;
1435 	struct hl_cs_job *job;
1436 	struct hl_cb *cb;
1437 	u32 cb_size;
1438 	bool patched_cb;
1439 
1440 	cntr = &hdev->aggregated_cs_counters;
1441 
1442 	if (mode == HL_COLLECTIVE_MASTER) {
1443 		/* CB size of collective master queue contains
1444 		 * 4 msg short packets for monitor 1 configuration
1445 		 * 1 fence packet
1446 		 * 4 msg short packets for monitor 2 configuration
1447 		 * 1 fence packet
1448 		 * 2 msg prot packets for completion and MSI
1449 		 */
1450 		cb_size = sizeof(struct packet_msg_short) * 8 +
1451 				sizeof(struct packet_fence) * 2 +
1452 				sizeof(struct packet_msg_prot) * 2;
1453 		patched_cb = true;
1454 	} else {
1455 		/* CB size of collective slave queues contains
1456 		 * 4 msg short packets for monitor configuration
1457 		 * 1 fence packet
1458 		 * 1 additional msg short packet for sob signal
1459 		 */
1460 		cb_size = sizeof(struct packet_msg_short) * 5 +
1461 				sizeof(struct packet_fence);
1462 		patched_cb = false;
1463 	}
1464 
1465 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 	if (!job) {
1468 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 		return -ENOMEM;
1472 	}
1473 
1474 	/* Allocate internal mapped CB for non patched CBs */
1475 	cb = hl_cb_kernel_create(hdev, cb_size,
1476 			hdev->mmu_enable && !patched_cb);
1477 	if (!cb) {
1478 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1479 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1480 		kfree(job);
1481 		return -EFAULT;
1482 	}
1483 
1484 	job->id = 0;
1485 	job->cs = cs;
1486 	job->user_cb = cb;
1487 	atomic_inc(&job->user_cb->cs_cnt);
1488 	job->user_cb_size = cb_size;
1489 	job->hw_queue_id = queue_id;
1490 
1491 	/* since its guaranteed to have only one chunk in the collective wait
1492 	 * cs, we can use this chunk to set the encapsulated signal offset
1493 	 * in the jobs.
1494 	 */
1495 	if (cs->encaps_signals)
1496 		job->encaps_sig_wait_offset = encaps_signal_offset;
1497 
1498 	/*
1499 	 * No need in parsing, user CB is the patched CB.
1500 	 * We call hl_cb_destroy() out of two reasons - we don't need
1501 	 * the CB in the CB idr anymore and to decrement its refcount as
1502 	 * it was incremented inside hl_cb_kernel_create().
1503 	 */
1504 	if (patched_cb)
1505 		job->patched_cb = job->user_cb;
1506 	else
1507 		job->patched_cb = NULL;
1508 
1509 	job->job_cb_size = job->user_cb_size;
1510 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1511 
1512 	/* increment refcount as for external queues we get completion */
1513 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1514 		cs_get(cs);
1515 
1516 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1517 
1518 	list_add_tail(&job->cs_node, &cs->job_list);
1519 
1520 	hl_debugfs_add_job(hdev, job);
1521 
1522 	return 0;
1523 }
1524 
1525 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1526 		struct hl_ctx *ctx, struct hl_cs *cs,
1527 		u32 wait_queue_id, u32 collective_engine_id,
1528 		u32 encaps_signal_offset)
1529 {
1530 	struct gaudi_device *gaudi = hdev->asic_specific;
1531 	struct hw_queue_properties *hw_queue_prop;
1532 	u32 queue_id, collective_queue, num_jobs;
1533 	u32 stream, nic_queue, nic_idx = 0;
1534 	bool skip;
1535 	int i, rc = 0;
1536 
1537 	/* Verify wait queue id is configured as master */
1538 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1539 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1540 		dev_err(hdev->dev,
1541 			"Queue %d is not configured as collective master\n",
1542 			wait_queue_id);
1543 		return -EINVAL;
1544 	}
1545 
1546 	/* Verify engine id is supported */
1547 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1548 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1549 		dev_err(hdev->dev,
1550 			"Collective wait does not support engine %u\n",
1551 			collective_engine_id);
1552 		return -EINVAL;
1553 	}
1554 
1555 	stream = wait_queue_id % 4;
1556 
1557 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1558 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1559 	else
1560 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1561 
1562 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1563 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1564 
1565 	/* First job goes to the collective master queue, it will wait for
1566 	 * the collective slave queues to finish execution.
1567 	 * The synchronization is done using two monitors:
1568 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1569 	 * reduction engine (DMA5/TPC7).
1570 	 *
1571 	 * Rest of the jobs goes to the collective slave queues which will
1572 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1573 	 */
1574 	for (i = 0 ; i < num_jobs ; i++) {
1575 		if (i == 0) {
1576 			queue_id = wait_queue_id;
1577 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1578 				HL_COLLECTIVE_MASTER, queue_id,
1579 				wait_queue_id, encaps_signal_offset);
1580 		} else {
1581 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1582 				if (gaudi->hw_cap_initialized &
1583 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1584 					skip = false;
1585 				else
1586 					skip = true;
1587 
1588 				queue_id = nic_queue;
1589 				nic_queue += 4;
1590 				nic_idx++;
1591 
1592 				if (skip)
1593 					continue;
1594 			} else {
1595 				queue_id = collective_queue;
1596 			}
1597 
1598 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1599 				HL_COLLECTIVE_SLAVE, queue_id,
1600 				wait_queue_id, encaps_signal_offset);
1601 		}
1602 
1603 		if (rc)
1604 			return rc;
1605 	}
1606 
1607 	return rc;
1608 }
1609 
1610 static int gaudi_late_init(struct hl_device *hdev)
1611 {
1612 	struct gaudi_device *gaudi = hdev->asic_specific;
1613 	int rc;
1614 
1615 	rc = gaudi->cpucp_info_get(hdev);
1616 	if (rc) {
1617 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1618 		return rc;
1619 	}
1620 
1621 	if ((hdev->card_type == cpucp_card_type_pci) &&
1622 			(hdev->nic_ports_mask & 0x3)) {
1623 		dev_info(hdev->dev,
1624 			"PCI card detected, only 8 ports are enabled\n");
1625 		hdev->nic_ports_mask &= ~0x3;
1626 
1627 		/* Stop and disable unused NIC QMANs */
1628 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1629 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1630 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1631 
1632 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1633 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1634 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1635 
1636 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1637 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1638 
1639 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1640 	}
1641 
1642 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1643 	if (rc) {
1644 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1645 		return rc;
1646 	}
1647 
1648 	/* Scrub both SRAM and DRAM */
1649 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1650 	if (rc)
1651 		goto disable_pci_access;
1652 
1653 	rc = gaudi_fetch_psoc_frequency(hdev);
1654 	if (rc) {
1655 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1656 		goto disable_pci_access;
1657 	}
1658 
1659 	rc = gaudi_mmu_clear_pgt_range(hdev);
1660 	if (rc) {
1661 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1662 		goto disable_pci_access;
1663 	}
1664 
1665 	rc = gaudi_init_tpc_mem(hdev);
1666 	if (rc) {
1667 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1668 		goto disable_pci_access;
1669 	}
1670 
1671 	rc = gaudi_collective_init(hdev);
1672 	if (rc) {
1673 		dev_err(hdev->dev, "Failed to init collective\n");
1674 		goto disable_pci_access;
1675 	}
1676 
1677 	/* We only support a single ASID for the user, so for the sake of optimization, just
1678 	 * initialize the ASID one time during device initialization with the fixed value of 1
1679 	 */
1680 	gaudi_mmu_prepare(hdev, 1);
1681 
1682 	hl_fw_set_pll_profile(hdev);
1683 
1684 	return 0;
1685 
1686 disable_pci_access:
1687 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1688 
1689 	return rc;
1690 }
1691 
1692 static void gaudi_late_fini(struct hl_device *hdev)
1693 {
1694 	hl_hwmon_release_resources(hdev);
1695 }
1696 
1697 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1698 {
1699 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1700 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1701 	int i, j, rc = 0;
1702 
1703 	/*
1704 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1705 	 * to '1' when accessing the host.
1706 	 * Bits 49:39 of the full host address are saved for a later
1707 	 * configuration of the HW to perform extension to 50 bits.
1708 	 * Because there is a single HW register that holds the extension bits,
1709 	 * these bits must be identical in all allocated range.
1710 	 */
1711 
1712 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1713 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1714 								&dma_addr_arr[i],
1715 								GFP_KERNEL | __GFP_ZERO);
1716 		if (!virt_addr_arr[i]) {
1717 			rc = -ENOMEM;
1718 			goto free_dma_mem_arr;
1719 		}
1720 
1721 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1722 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1723 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1724 			break;
1725 	}
1726 
1727 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1728 		dev_err(hdev->dev,
1729 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1730 		rc = -EFAULT;
1731 		goto free_dma_mem_arr;
1732 	}
1733 
1734 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1735 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1736 	hdev->cpu_pci_msb_addr =
1737 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1738 
1739 	if (!hdev->asic_prop.fw_security_enabled)
1740 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1741 
1742 free_dma_mem_arr:
1743 	for (j = 0 ; j < i ; j++)
1744 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1745 						dma_addr_arr[j]);
1746 
1747 	return rc;
1748 }
1749 
1750 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1751 {
1752 	struct gaudi_device *gaudi = hdev->asic_specific;
1753 	struct gaudi_internal_qman_info *q;
1754 	u32 i;
1755 
1756 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1757 		q = &gaudi->internal_qmans[i];
1758 		if (!q->pq_kernel_addr)
1759 			continue;
1760 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1761 	}
1762 }
1763 
1764 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1765 {
1766 	struct gaudi_device *gaudi = hdev->asic_specific;
1767 	struct gaudi_internal_qman_info *q;
1768 	int rc, i;
1769 
1770 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1771 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1772 			continue;
1773 
1774 		q = &gaudi->internal_qmans[i];
1775 
1776 		switch (i) {
1777 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1778 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1779 			break;
1780 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1781 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1782 			break;
1783 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1784 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1785 			break;
1786 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1787 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1788 			break;
1789 		default:
1790 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1791 			rc = -EINVAL;
1792 			goto free_internal_qmans_pq_mem;
1793 		}
1794 
1795 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1796 								GFP_KERNEL | __GFP_ZERO);
1797 		if (!q->pq_kernel_addr) {
1798 			rc = -ENOMEM;
1799 			goto free_internal_qmans_pq_mem;
1800 		}
1801 	}
1802 
1803 	return 0;
1804 
1805 free_internal_qmans_pq_mem:
1806 	gaudi_free_internal_qmans_pq_mem(hdev);
1807 	return rc;
1808 }
1809 
1810 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1811 {
1812 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1813 	struct pci_mem_region *region;
1814 
1815 	/* CFG */
1816 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1817 	region->region_base = CFG_BASE;
1818 	region->region_size = CFG_SIZE;
1819 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1820 	region->bar_size = CFG_BAR_SIZE;
1821 	region->bar_id = CFG_BAR_ID;
1822 	region->used = 1;
1823 
1824 	/* SRAM */
1825 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1826 	region->region_base = SRAM_BASE_ADDR;
1827 	region->region_size = SRAM_SIZE;
1828 	region->offset_in_bar = 0;
1829 	region->bar_size = SRAM_BAR_SIZE;
1830 	region->bar_id = SRAM_BAR_ID;
1831 	region->used = 1;
1832 
1833 	/* DRAM */
1834 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1835 	region->region_base = DRAM_PHYS_BASE;
1836 	region->region_size = hdev->asic_prop.dram_size;
1837 	region->offset_in_bar = 0;
1838 	region->bar_size = prop->dram_pci_bar_size;
1839 	region->bar_id = HBM_BAR_ID;
1840 	region->used = 1;
1841 
1842 	/* SP SRAM */
1843 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1844 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1845 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1846 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1847 	region->bar_size = CFG_BAR_SIZE;
1848 	region->bar_id = CFG_BAR_ID;
1849 	region->used = 1;
1850 }
1851 
1852 static int gaudi_sw_init(struct hl_device *hdev)
1853 {
1854 	struct gaudi_device *gaudi;
1855 	u32 i, event_id = 0;
1856 	int rc;
1857 
1858 	/* Allocate device structure */
1859 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1860 	if (!gaudi)
1861 		return -ENOMEM;
1862 
1863 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1864 		if (gaudi_irq_map_table[i].valid) {
1865 			if (event_id == GAUDI_EVENT_SIZE) {
1866 				dev_err(hdev->dev,
1867 					"Event array exceeds the limit of %u events\n",
1868 					GAUDI_EVENT_SIZE);
1869 				rc = -EINVAL;
1870 				goto free_gaudi_device;
1871 			}
1872 
1873 			gaudi->events[event_id++] =
1874 					gaudi_irq_map_table[i].fc_id;
1875 		}
1876 	}
1877 
1878 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1879 
1880 	hdev->asic_specific = gaudi;
1881 
1882 	/* Create DMA pool for small allocations */
1883 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1884 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1885 	if (!hdev->dma_pool) {
1886 		dev_err(hdev->dev, "failed to create DMA pool\n");
1887 		rc = -ENOMEM;
1888 		goto free_gaudi_device;
1889 	}
1890 
1891 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1892 	if (rc)
1893 		goto free_dma_pool;
1894 
1895 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1896 	if (!hdev->cpu_accessible_dma_pool) {
1897 		dev_err(hdev->dev,
1898 			"Failed to create CPU accessible DMA pool\n");
1899 		rc = -ENOMEM;
1900 		goto free_cpu_dma_mem;
1901 	}
1902 
1903 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1904 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1905 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1906 	if (rc) {
1907 		dev_err(hdev->dev,
1908 			"Failed to add memory to CPU accessible DMA pool\n");
1909 		rc = -EFAULT;
1910 		goto free_cpu_accessible_dma_pool;
1911 	}
1912 
1913 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1914 	if (rc)
1915 		goto free_cpu_accessible_dma_pool;
1916 
1917 	spin_lock_init(&gaudi->hw_queues_lock);
1918 
1919 	hdev->supports_sync_stream = true;
1920 	hdev->supports_coresight = true;
1921 	hdev->supports_staged_submission = true;
1922 	hdev->supports_wait_for_multi_cs = true;
1923 
1924 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1925 	hdev->stream_master_qid_arr =
1926 				hdev->asic_funcs->get_stream_master_qid_arr();
1927 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1928 
1929 	return 0;
1930 
1931 free_cpu_accessible_dma_pool:
1932 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1933 free_cpu_dma_mem:
1934 	if (!hdev->asic_prop.fw_security_enabled)
1935 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1936 					hdev->cpu_pci_msb_addr);
1937 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1938 					hdev->cpu_accessible_dma_address);
1939 free_dma_pool:
1940 	dma_pool_destroy(hdev->dma_pool);
1941 free_gaudi_device:
1942 	kfree(gaudi);
1943 	return rc;
1944 }
1945 
1946 static int gaudi_sw_fini(struct hl_device *hdev)
1947 {
1948 	struct gaudi_device *gaudi = hdev->asic_specific;
1949 
1950 	gaudi_free_internal_qmans_pq_mem(hdev);
1951 
1952 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1953 
1954 	if (!hdev->asic_prop.fw_security_enabled)
1955 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1956 					hdev->cpu_pci_msb_addr);
1957 
1958 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1959 					hdev->cpu_accessible_dma_address);
1960 
1961 	dma_pool_destroy(hdev->dma_pool);
1962 
1963 	kfree(gaudi);
1964 
1965 	return 0;
1966 }
1967 
1968 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1969 {
1970 	struct hl_device *hdev = arg;
1971 	int i;
1972 
1973 	if (hdev->disabled)
1974 		return IRQ_HANDLED;
1975 
1976 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1977 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1978 
1979 	hl_irq_handler_eq(irq, &hdev->event_queue);
1980 
1981 	return IRQ_HANDLED;
1982 }
1983 
1984 /*
1985  * For backward compatibility, new MSI interrupts should be set after the
1986  * existing CPU and NIC interrupts.
1987  */
1988 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1989 				bool cpu_eq)
1990 {
1991 	int msi_vec;
1992 
1993 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1994 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1995 				GAUDI_EVENT_QUEUE_MSI_IDX);
1996 
1997 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1998 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1999 
2000 	return pci_irq_vector(hdev->pdev, msi_vec);
2001 }
2002 
2003 static int gaudi_enable_msi_single(struct hl_device *hdev)
2004 {
2005 	int rc, irq;
2006 
2007 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2008 
2009 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2010 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2011 			"gaudi single msi", hdev);
2012 	if (rc)
2013 		dev_err(hdev->dev,
2014 			"Failed to request single MSI IRQ\n");
2015 
2016 	return rc;
2017 }
2018 
2019 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2020 {
2021 	int cq_cnt = hdev->asic_prop.completion_queues_count;
2022 	int rc, i, irq_cnt_init, irq;
2023 
2024 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2025 		irq = gaudi_pci_irq_vector(hdev, i, false);
2026 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2027 				&hdev->completion_queue[i]);
2028 		if (rc) {
2029 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2030 			goto free_irqs;
2031 		}
2032 	}
2033 
2034 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2035 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2036 				&hdev->event_queue);
2037 	if (rc) {
2038 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2039 		goto free_irqs;
2040 	}
2041 
2042 	return 0;
2043 
2044 free_irqs:
2045 	for (i = 0 ; i < irq_cnt_init ; i++)
2046 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2047 				&hdev->completion_queue[i]);
2048 	return rc;
2049 }
2050 
2051 static int gaudi_enable_msi(struct hl_device *hdev)
2052 {
2053 	struct gaudi_device *gaudi = hdev->asic_specific;
2054 	int rc;
2055 
2056 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2057 		return 0;
2058 
2059 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2060 	if (rc < 0) {
2061 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2062 		return rc;
2063 	}
2064 
2065 	if (rc < NUMBER_OF_INTERRUPTS) {
2066 		gaudi->multi_msi_mode = false;
2067 		rc = gaudi_enable_msi_single(hdev);
2068 	} else {
2069 		gaudi->multi_msi_mode = true;
2070 		rc = gaudi_enable_msi_multi(hdev);
2071 	}
2072 
2073 	if (rc)
2074 		goto free_pci_irq_vectors;
2075 
2076 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2077 
2078 	return 0;
2079 
2080 free_pci_irq_vectors:
2081 	pci_free_irq_vectors(hdev->pdev);
2082 	return rc;
2083 }
2084 
2085 static void gaudi_sync_irqs(struct hl_device *hdev)
2086 {
2087 	struct gaudi_device *gaudi = hdev->asic_specific;
2088 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2089 
2090 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2091 		return;
2092 
2093 	/* Wait for all pending IRQs to be finished */
2094 	if (gaudi->multi_msi_mode) {
2095 		for (i = 0 ; i < cq_cnt ; i++)
2096 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2097 
2098 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2099 						GAUDI_EVENT_QUEUE_MSI_IDX,
2100 						true));
2101 	} else {
2102 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2103 	}
2104 }
2105 
2106 static void gaudi_disable_msi(struct hl_device *hdev)
2107 {
2108 	struct gaudi_device *gaudi = hdev->asic_specific;
2109 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2110 
2111 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2112 		return;
2113 
2114 	gaudi_sync_irqs(hdev);
2115 
2116 	if (gaudi->multi_msi_mode) {
2117 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2118 						true);
2119 		free_irq(irq, &hdev->event_queue);
2120 
2121 		for (i = 0 ; i < cq_cnt ; i++) {
2122 			irq = gaudi_pci_irq_vector(hdev, i, false);
2123 			free_irq(irq, &hdev->completion_queue[i]);
2124 		}
2125 	} else {
2126 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2127 	}
2128 
2129 	pci_free_irq_vectors(hdev->pdev);
2130 
2131 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2132 }
2133 
2134 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2135 {
2136 	struct gaudi_device *gaudi = hdev->asic_specific;
2137 
2138 	if (hdev->asic_prop.fw_security_enabled)
2139 		return;
2140 
2141 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2142 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2143 		return;
2144 
2145 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2146 		return;
2147 
2148 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2149 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2151 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2157 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2159 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2161 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2163 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164 
2165 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2166 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2174 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2175 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2176 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2177 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2178 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2179 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2180 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2181 
2182 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2183 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2185 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2191 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2192 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2193 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2194 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2195 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2196 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2197 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2198 
2199 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2200 }
2201 
2202 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2203 {
2204 	struct gaudi_device *gaudi = hdev->asic_specific;
2205 
2206 	if (hdev->asic_prop.fw_security_enabled)
2207 		return;
2208 
2209 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2211 		return;
2212 
2213 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2214 		return;
2215 
2216 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2217 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2219 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2221 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2223 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2225 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2227 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2228 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2229 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2230 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2231 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232 
2233 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2234 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2236 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2238 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2240 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2242 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2244 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2245 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2246 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2247 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2248 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2249 
2250 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2251 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2253 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2255 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2257 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2259 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2261 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2262 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2263 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2264 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2265 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2266 
2267 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2268 }
2269 
2270 static void gaudi_init_e2e(struct hl_device *hdev)
2271 {
2272 	if (hdev->asic_prop.fw_security_enabled)
2273 		return;
2274 
2275 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2276 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2277 		return;
2278 
2279 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2280 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2281 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2282 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2283 
2284 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2285 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2286 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2287 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2288 
2289 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2290 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2291 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2292 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2293 
2294 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2295 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2296 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2297 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2298 
2299 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2300 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2301 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2302 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2303 
2304 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2305 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2306 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2307 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2308 
2309 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2310 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2311 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2312 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2313 
2314 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2315 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2316 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2317 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2318 
2319 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2320 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2321 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2322 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2323 
2324 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2325 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2326 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2327 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2328 
2329 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2330 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2331 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2332 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2333 
2334 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2335 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2336 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2337 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2338 
2339 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2340 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2341 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2342 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2343 
2344 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2345 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2346 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2347 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2348 
2349 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2350 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2351 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2352 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2353 
2354 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2355 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2356 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2357 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2358 
2359 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2360 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2361 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2362 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2363 
2364 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2365 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2366 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2367 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2368 
2369 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2370 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2371 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2372 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2373 
2374 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2375 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2376 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2377 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2378 
2379 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2380 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2381 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2382 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2383 
2384 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2385 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2386 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2387 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2388 
2389 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2390 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2391 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2392 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2393 
2394 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2395 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2396 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2397 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2398 
2399 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2400 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2401 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2402 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2403 
2404 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2405 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2406 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2407 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2408 
2409 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2410 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2411 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2412 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2413 
2414 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2415 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2416 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2417 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2418 
2419 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2420 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2421 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2422 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2423 
2424 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2425 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2426 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2427 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2428 
2429 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2430 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2431 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2432 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2433 
2434 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2435 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2436 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2437 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2438 
2439 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2440 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2441 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2442 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2443 
2444 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2445 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2446 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2447 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2448 
2449 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2450 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2451 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2452 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2453 
2454 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2455 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2456 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2457 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2458 
2459 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2460 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2461 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2462 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2463 
2464 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2465 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2466 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2467 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2468 
2469 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2470 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2471 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2472 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2473 
2474 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2475 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2476 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2477 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2478 
2479 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2480 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2481 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2482 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2483 
2484 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2485 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2486 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2487 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2488 
2489 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2490 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2491 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2492 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2493 
2494 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2495 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2496 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2497 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2498 
2499 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2500 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2501 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2502 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2503 
2504 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2505 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2506 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2507 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2508 
2509 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2510 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2511 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2512 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2513 
2514 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2515 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2516 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2517 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2518 }
2519 
2520 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2521 {
2522 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2523 
2524 	if (hdev->asic_prop.fw_security_enabled)
2525 		return;
2526 
2527 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2528 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2529 		return;
2530 
2531 	hbm0_wr = 0x33333333;
2532 	hbm0_rd = 0x77777777;
2533 	hbm1_wr = 0x55555555;
2534 	hbm1_rd = 0xDDDDDDDD;
2535 
2536 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2537 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2538 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2539 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2540 
2541 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2542 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2543 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2544 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2545 
2546 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2547 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2548 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2549 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2550 
2551 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2552 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2553 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2554 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2555 
2556 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2557 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2558 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2559 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2560 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2561 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2562 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2563 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2564 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2565 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2566 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2567 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2568 
2569 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2570 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2571 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2572 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2573 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2574 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2575 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2576 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2577 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2578 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2579 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2580 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2581 }
2582 
2583 static void gaudi_init_golden_registers(struct hl_device *hdev)
2584 {
2585 	u32 tpc_offset;
2586 	int tpc_id, i;
2587 
2588 	gaudi_init_e2e(hdev);
2589 	gaudi_init_hbm_cred(hdev);
2590 
2591 	for (tpc_id = 0, tpc_offset = 0;
2592 				tpc_id < TPC_NUMBER_OF_ENGINES;
2593 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2594 		/* Mask all arithmetic interrupts from TPC */
2595 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2596 		/* Set 16 cache lines */
2597 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2598 				ICACHE_FETCH_LINE_NUM, 2);
2599 	}
2600 
2601 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2602 	for (i = 0 ; i < 128 ; i += 8)
2603 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2604 
2605 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2606 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2607 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2608 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2609 }
2610 
2611 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2612 					int qman_id, dma_addr_t qman_pq_addr)
2613 {
2614 	struct cpu_dyn_regs *dyn_regs =
2615 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2616 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2617 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2618 	u32 q_off, dma_qm_offset;
2619 	u32 dma_qm_err_cfg, irq_handler_offset;
2620 
2621 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2622 
2623 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2624 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2625 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2626 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2627 	so_base_en_lo = lower_32_bits(CFG_BASE +
2628 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2629 	so_base_en_hi = upper_32_bits(CFG_BASE +
2630 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2632 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2633 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2634 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2635 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2636 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2637 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2638 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2639 
2640 	q_off = dma_qm_offset + qman_id * 4;
2641 
2642 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2643 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2644 
2645 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2646 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2647 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2648 
2649 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2650 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2651 							QMAN_LDMA_SRC_OFFSET);
2652 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2653 							QMAN_LDMA_DST_OFFSET);
2654 
2655 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2656 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2657 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2658 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2659 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2660 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2661 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2662 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2663 
2664 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2665 
2666 	/* The following configuration is needed only once per QMAN */
2667 	if (qman_id == 0) {
2668 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2669 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2670 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2671 
2672 		/* Configure RAZWI IRQ */
2673 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2674 		if (hdev->stop_on_err)
2675 			dma_qm_err_cfg |=
2676 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2677 
2678 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2679 
2680 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2681 			lower_32_bits(CFG_BASE + irq_handler_offset));
2682 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2683 			upper_32_bits(CFG_BASE + irq_handler_offset));
2684 
2685 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2686 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2687 									dma_id);
2688 
2689 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2690 				QM_ARB_ERR_MSG_EN_MASK);
2691 
2692 		/* Set timeout to maximum */
2693 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2694 
2695 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2696 				QMAN_EXTERNAL_MAKE_TRUSTED);
2697 
2698 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2699 	}
2700 }
2701 
2702 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2703 {
2704 	struct cpu_dyn_regs *dyn_regs =
2705 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2706 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2707 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2708 	u32 irq_handler_offset;
2709 
2710 	/* Set to maximum possible according to physical size */
2711 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2712 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2713 
2714 	/* WA for H/W bug H3-2116 */
2715 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2716 
2717 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2718 	if (hdev->stop_on_err)
2719 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2720 
2721 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2722 
2723 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2724 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2725 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2726 
2727 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2728 		lower_32_bits(CFG_BASE + irq_handler_offset));
2729 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2730 		upper_32_bits(CFG_BASE + irq_handler_offset));
2731 
2732 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2733 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2734 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2735 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2736 	/* If the channel is secured, it should be in MMU bypass mode */
2737 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2738 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2739 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2740 }
2741 
2742 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2743 				u32 enable_mask)
2744 {
2745 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2746 
2747 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2748 }
2749 
2750 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2751 {
2752 	struct gaudi_device *gaudi = hdev->asic_specific;
2753 	struct hl_hw_queue *q;
2754 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2755 
2756 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2757 		return;
2758 
2759 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2760 		dma_id = gaudi_dma_assignment[i];
2761 		/*
2762 		 * For queues after the CPU Q need to add 1 to get the correct
2763 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2764 		 * order to get the correct MSI register.
2765 		 */
2766 		if (dma_id > 1) {
2767 			cpu_skip = 1;
2768 			nic_skip = NIC_NUMBER_OF_ENGINES;
2769 		} else {
2770 			cpu_skip = 0;
2771 			nic_skip = 0;
2772 		}
2773 
2774 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2775 			q_idx = 4 * dma_id + j + cpu_skip;
2776 			q = &hdev->kernel_queues[q_idx];
2777 			q->cq_id = cq_id++;
2778 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2779 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2780 						q->bus_address);
2781 		}
2782 
2783 		gaudi_init_dma_core(hdev, dma_id);
2784 
2785 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2786 	}
2787 
2788 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2789 }
2790 
2791 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2792 					int qman_id, u64 qman_base_addr)
2793 {
2794 	struct cpu_dyn_regs *dyn_regs =
2795 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2796 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2797 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2798 	u32 dma_qm_err_cfg, irq_handler_offset;
2799 	u32 q_off, dma_qm_offset;
2800 
2801 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2802 
2803 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2804 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2805 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2806 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2807 	so_base_en_lo = lower_32_bits(CFG_BASE +
2808 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2809 	so_base_en_hi = upper_32_bits(CFG_BASE +
2810 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2812 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2813 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2814 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2815 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2816 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2817 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2818 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2819 
2820 	q_off = dma_qm_offset + qman_id * 4;
2821 
2822 	if (qman_id < 4) {
2823 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2824 					lower_32_bits(qman_base_addr));
2825 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2826 					upper_32_bits(qman_base_addr));
2827 
2828 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2829 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2830 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2831 
2832 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2833 							QMAN_CPDMA_SIZE_OFFSET);
2834 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2835 							QMAN_CPDMA_SRC_OFFSET);
2836 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2837 							QMAN_CPDMA_DST_OFFSET);
2838 	} else {
2839 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2840 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2841 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2842 
2843 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2844 							QMAN_LDMA_SIZE_OFFSET);
2845 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2846 							QMAN_LDMA_SRC_OFFSET);
2847 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2848 							QMAN_LDMA_DST_OFFSET);
2849 
2850 		/* Configure RAZWI IRQ */
2851 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2852 		if (hdev->stop_on_err)
2853 			dma_qm_err_cfg |=
2854 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2855 
2856 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2857 
2858 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2859 			lower_32_bits(CFG_BASE + irq_handler_offset));
2860 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2861 			upper_32_bits(CFG_BASE + irq_handler_offset));
2862 
2863 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2864 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2865 									dma_id);
2866 
2867 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2868 				QM_ARB_ERR_MSG_EN_MASK);
2869 
2870 		/* Set timeout to maximum */
2871 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2872 
2873 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2874 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2875 				QMAN_INTERNAL_MAKE_TRUSTED);
2876 	}
2877 
2878 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2879 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2880 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2881 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2882 
2883 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2884 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2885 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2886 				mtr_base_ws_lo);
2887 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2888 				mtr_base_ws_hi);
2889 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2890 				so_base_ws_lo);
2891 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2892 				so_base_ws_hi);
2893 	}
2894 }
2895 
2896 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2897 {
2898 	struct gaudi_device *gaudi = hdev->asic_specific;
2899 	struct gaudi_internal_qman_info *q;
2900 	u64 qman_base_addr;
2901 	int i, j, dma_id, internal_q_index;
2902 
2903 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2904 		return;
2905 
2906 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2907 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2908 
2909 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2910 			 /*
2911 			  * Add the CPU queue in order to get the correct queue
2912 			  * number as all internal queue are placed after it
2913 			  */
2914 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2915 
2916 			q = &gaudi->internal_qmans[internal_q_index];
2917 			qman_base_addr = (u64) q->pq_dma_addr;
2918 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2919 						qman_base_addr);
2920 		}
2921 
2922 		/* Initializing lower CP for HBM DMA QMAN */
2923 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2924 
2925 		gaudi_init_dma_core(hdev, dma_id);
2926 
2927 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2928 	}
2929 
2930 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2931 }
2932 
2933 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2934 					int qman_id, u64 qman_base_addr)
2935 {
2936 	struct cpu_dyn_regs *dyn_regs =
2937 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2938 	u32 mtr_base_lo, mtr_base_hi;
2939 	u32 so_base_lo, so_base_hi;
2940 	u32 irq_handler_offset;
2941 	u32 q_off, mme_id;
2942 	u32 mme_qm_err_cfg;
2943 
2944 	mtr_base_lo = lower_32_bits(CFG_BASE +
2945 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2946 	mtr_base_hi = upper_32_bits(CFG_BASE +
2947 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2948 	so_base_lo = lower_32_bits(CFG_BASE +
2949 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2950 	so_base_hi = upper_32_bits(CFG_BASE +
2951 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2952 
2953 	q_off = mme_offset + qman_id * 4;
2954 
2955 	if (qman_id < 4) {
2956 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2957 					lower_32_bits(qman_base_addr));
2958 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2959 					upper_32_bits(qman_base_addr));
2960 
2961 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2962 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2963 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2964 
2965 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2966 							QMAN_CPDMA_SIZE_OFFSET);
2967 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2968 							QMAN_CPDMA_SRC_OFFSET);
2969 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2970 							QMAN_CPDMA_DST_OFFSET);
2971 	} else {
2972 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2973 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2974 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2975 
2976 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2977 							QMAN_LDMA_SIZE_OFFSET);
2978 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2979 							QMAN_LDMA_SRC_OFFSET);
2980 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2981 							QMAN_LDMA_DST_OFFSET);
2982 
2983 		/* Configure RAZWI IRQ */
2984 		mme_id = mme_offset /
2985 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2986 
2987 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2988 		if (hdev->stop_on_err)
2989 			mme_qm_err_cfg |=
2990 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2991 
2992 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2993 
2994 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2995 			lower_32_bits(CFG_BASE + irq_handler_offset));
2996 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2997 			upper_32_bits(CFG_BASE + irq_handler_offset));
2998 
2999 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3000 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3001 									mme_id);
3002 
3003 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3004 				QM_ARB_ERR_MSG_EN_MASK);
3005 
3006 		/* Set timeout to maximum */
3007 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3008 
3009 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3010 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3011 				QMAN_INTERNAL_MAKE_TRUSTED);
3012 	}
3013 
3014 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3015 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3016 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3017 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3018 }
3019 
3020 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3021 {
3022 	struct gaudi_device *gaudi = hdev->asic_specific;
3023 	struct gaudi_internal_qman_info *q;
3024 	u64 qman_base_addr;
3025 	u32 mme_offset;
3026 	int i, internal_q_index;
3027 
3028 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3029 		return;
3030 
3031 	/*
3032 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3033 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3034 	 */
3035 
3036 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3037 
3038 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3039 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3040 		q = &gaudi->internal_qmans[internal_q_index];
3041 		qman_base_addr = (u64) q->pq_dma_addr;
3042 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3043 					qman_base_addr);
3044 		if (i == 3)
3045 			mme_offset = 0;
3046 	}
3047 
3048 	/* Initializing lower CP for MME QMANs */
3049 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3050 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3051 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3052 
3053 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3054 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3055 
3056 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3057 }
3058 
3059 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3060 				int qman_id, u64 qman_base_addr)
3061 {
3062 	struct cpu_dyn_regs *dyn_regs =
3063 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3064 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3065 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3066 	u32 tpc_qm_err_cfg, irq_handler_offset;
3067 	u32 q_off, tpc_id;
3068 
3069 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3070 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3071 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3072 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3073 	so_base_en_lo = lower_32_bits(CFG_BASE +
3074 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3075 	so_base_en_hi = upper_32_bits(CFG_BASE +
3076 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3078 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3079 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3080 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3081 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3082 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3083 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3084 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3085 
3086 	q_off = tpc_offset + qman_id * 4;
3087 
3088 	tpc_id = tpc_offset /
3089 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3090 
3091 	if (qman_id < 4) {
3092 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3093 					lower_32_bits(qman_base_addr));
3094 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3095 					upper_32_bits(qman_base_addr));
3096 
3097 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3098 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3099 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3100 
3101 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3102 							QMAN_CPDMA_SIZE_OFFSET);
3103 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3104 							QMAN_CPDMA_SRC_OFFSET);
3105 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3106 							QMAN_CPDMA_DST_OFFSET);
3107 	} else {
3108 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3109 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3110 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3111 
3112 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3113 							QMAN_LDMA_SIZE_OFFSET);
3114 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3115 							QMAN_LDMA_SRC_OFFSET);
3116 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3117 							QMAN_LDMA_DST_OFFSET);
3118 
3119 		/* Configure RAZWI IRQ */
3120 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3121 		if (hdev->stop_on_err)
3122 			tpc_qm_err_cfg |=
3123 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3124 
3125 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3126 
3127 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3128 			lower_32_bits(CFG_BASE + irq_handler_offset));
3129 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3130 			upper_32_bits(CFG_BASE + irq_handler_offset));
3131 
3132 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3133 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3134 									tpc_id);
3135 
3136 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3137 				QM_ARB_ERR_MSG_EN_MASK);
3138 
3139 		/* Set timeout to maximum */
3140 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3141 
3142 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3143 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3144 				QMAN_INTERNAL_MAKE_TRUSTED);
3145 	}
3146 
3147 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3148 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3149 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3150 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3151 
3152 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3153 	if (tpc_id == 6) {
3154 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3155 				mtr_base_ws_lo);
3156 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3157 				mtr_base_ws_hi);
3158 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3159 				so_base_ws_lo);
3160 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3161 				so_base_ws_hi);
3162 	}
3163 }
3164 
3165 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3166 {
3167 	struct gaudi_device *gaudi = hdev->asic_specific;
3168 	struct gaudi_internal_qman_info *q;
3169 	u64 qman_base_addr;
3170 	u32 so_base_hi, tpc_offset = 0;
3171 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3172 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3173 	int i, tpc_id, internal_q_index;
3174 
3175 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3176 		return;
3177 
3178 	so_base_hi = upper_32_bits(CFG_BASE +
3179 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3180 
3181 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3182 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3183 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3184 						tpc_id * QMAN_STREAMS + i;
3185 			q = &gaudi->internal_qmans[internal_q_index];
3186 			qman_base_addr = (u64) q->pq_dma_addr;
3187 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3188 						qman_base_addr);
3189 
3190 			if (i == 3) {
3191 				/* Initializing lower CP for TPC QMAN */
3192 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3193 
3194 				/* Enable the QMAN and TPC channel */
3195 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3196 						QMAN_TPC_ENABLE);
3197 			}
3198 		}
3199 
3200 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3201 				so_base_hi);
3202 
3203 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3204 
3205 		gaudi->hw_cap_initialized |=
3206 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3207 	}
3208 }
3209 
3210 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3211 				int qman_id, u64 qman_base_addr, int nic_id)
3212 {
3213 	struct cpu_dyn_regs *dyn_regs =
3214 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3215 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3216 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3217 	u32 nic_qm_err_cfg, irq_handler_offset;
3218 	u32 q_off;
3219 
3220 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3221 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3222 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3223 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3224 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3225 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3226 	so_base_en_hi = upper_32_bits(CFG_BASE +
3227 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3229 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3230 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3231 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3232 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3233 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3234 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3235 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3236 
3237 	q_off = nic_offset + qman_id * 4;
3238 
3239 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3240 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3241 
3242 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3243 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3244 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3245 
3246 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3247 							QMAN_LDMA_SIZE_OFFSET);
3248 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3249 							QMAN_LDMA_SRC_OFFSET);
3250 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3251 							QMAN_LDMA_DST_OFFSET);
3252 
3253 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3254 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3255 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3256 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3257 
3258 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3259 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3260 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3261 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3262 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3263 
3264 	if (qman_id == 0) {
3265 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3266 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3267 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3268 
3269 		/* Configure RAZWI IRQ */
3270 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3271 		if (hdev->stop_on_err)
3272 			nic_qm_err_cfg |=
3273 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3274 
3275 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3276 
3277 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3278 			lower_32_bits(CFG_BASE + irq_handler_offset));
3279 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3280 			upper_32_bits(CFG_BASE + irq_handler_offset));
3281 
3282 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3283 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3284 									nic_id);
3285 
3286 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3287 				QM_ARB_ERR_MSG_EN_MASK);
3288 
3289 		/* Set timeout to maximum */
3290 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3291 
3292 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3293 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3294 				QMAN_INTERNAL_MAKE_TRUSTED);
3295 	}
3296 }
3297 
3298 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3299 {
3300 	struct gaudi_device *gaudi = hdev->asic_specific;
3301 	struct gaudi_internal_qman_info *q;
3302 	u64 qman_base_addr;
3303 	u32 nic_offset = 0;
3304 	u32 nic_delta_between_qmans =
3305 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3306 	u32 nic_delta_between_nics =
3307 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3308 	int i, nic_id, internal_q_index;
3309 
3310 	if (!hdev->nic_ports_mask)
3311 		return;
3312 
3313 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3314 		return;
3315 
3316 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3317 
3318 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3319 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3320 			nic_offset += nic_delta_between_qmans;
3321 			if (nic_id & 1) {
3322 				nic_offset -= (nic_delta_between_qmans * 2);
3323 				nic_offset += nic_delta_between_nics;
3324 			}
3325 			continue;
3326 		}
3327 
3328 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3329 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3330 						nic_id * QMAN_STREAMS + i;
3331 			q = &gaudi->internal_qmans[internal_q_index];
3332 			qman_base_addr = (u64) q->pq_dma_addr;
3333 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3334 						qman_base_addr, nic_id);
3335 		}
3336 
3337 		/* Enable the QMAN */
3338 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3339 
3340 		nic_offset += nic_delta_between_qmans;
3341 		if (nic_id & 1) {
3342 			nic_offset -= (nic_delta_between_qmans * 2);
3343 			nic_offset += nic_delta_between_nics;
3344 		}
3345 
3346 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3347 	}
3348 }
3349 
3350 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3351 {
3352 	struct gaudi_device *gaudi = hdev->asic_specific;
3353 
3354 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3355 		return;
3356 
3357 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3358 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3359 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3360 }
3361 
3362 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3363 {
3364 	struct gaudi_device *gaudi = hdev->asic_specific;
3365 
3366 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3367 		return;
3368 
3369 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3370 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3371 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3372 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3373 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3374 }
3375 
3376 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3377 {
3378 	struct gaudi_device *gaudi = hdev->asic_specific;
3379 
3380 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3381 		return;
3382 
3383 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3384 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3385 }
3386 
3387 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3388 {
3389 	struct gaudi_device *gaudi = hdev->asic_specific;
3390 	u32 tpc_offset = 0;
3391 	int tpc_id;
3392 
3393 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3394 		return;
3395 
3396 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3397 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3398 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3399 	}
3400 }
3401 
3402 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3403 {
3404 	struct gaudi_device *gaudi = hdev->asic_specific;
3405 	u32 nic_mask, nic_offset = 0;
3406 	u32 nic_delta_between_qmans =
3407 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408 	u32 nic_delta_between_nics =
3409 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410 	int nic_id;
3411 
3412 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3413 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3414 
3415 		if (gaudi->hw_cap_initialized & nic_mask)
3416 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3417 
3418 		nic_offset += nic_delta_between_qmans;
3419 		if (nic_id & 1) {
3420 			nic_offset -= (nic_delta_between_qmans * 2);
3421 			nic_offset += nic_delta_between_nics;
3422 		}
3423 	}
3424 }
3425 
3426 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3427 {
3428 	struct gaudi_device *gaudi = hdev->asic_specific;
3429 
3430 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3431 		return;
3432 
3433 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3434 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3435 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3436 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3437 }
3438 
3439 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3440 {
3441 	struct gaudi_device *gaudi = hdev->asic_specific;
3442 
3443 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3444 		return;
3445 
3446 	/* Stop CPs of HBM DMA QMANs */
3447 
3448 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3449 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3450 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3451 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3452 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3453 }
3454 
3455 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3456 {
3457 	struct gaudi_device *gaudi = hdev->asic_specific;
3458 
3459 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3460 		return;
3461 
3462 	/* Stop CPs of MME QMANs */
3463 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3464 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3465 }
3466 
3467 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3468 {
3469 	struct gaudi_device *gaudi = hdev->asic_specific;
3470 
3471 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3472 		return;
3473 
3474 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3477 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3478 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3479 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3480 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3481 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3482 }
3483 
3484 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3485 {
3486 	struct gaudi_device *gaudi = hdev->asic_specific;
3487 
3488 	/* Stop upper CPs of QMANs */
3489 
3490 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3491 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3492 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3493 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3494 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3495 
3496 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3497 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3498 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3499 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3500 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3501 
3502 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3503 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3504 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3505 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3506 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3507 
3508 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3509 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3510 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3511 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3512 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3513 
3514 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3515 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3516 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3517 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3518 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3519 
3520 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3521 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3522 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3525 
3526 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3527 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3528 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3531 
3532 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3533 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3534 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3537 
3538 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3539 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3540 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3543 
3544 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3545 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3546 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3547 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3548 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3549 }
3550 
3551 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3552 {
3553 	struct gaudi_device *gaudi = hdev->asic_specific;
3554 
3555 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3556 		return;
3557 
3558 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3559 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3560 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3561 }
3562 
3563 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3564 {
3565 	struct gaudi_device *gaudi = hdev->asic_specific;
3566 
3567 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3568 		return;
3569 
3570 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3571 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3572 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3573 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3574 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3575 }
3576 
3577 static void gaudi_mme_stall(struct hl_device *hdev)
3578 {
3579 	struct gaudi_device *gaudi = hdev->asic_specific;
3580 
3581 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3582 		return;
3583 
3584 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3585 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3586 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3587 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3588 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3589 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3590 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3591 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3592 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3593 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3594 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3595 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3596 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3597 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3598 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3599 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3600 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3601 }
3602 
3603 static void gaudi_tpc_stall(struct hl_device *hdev)
3604 {
3605 	struct gaudi_device *gaudi = hdev->asic_specific;
3606 
3607 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3608 		return;
3609 
3610 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3611 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3612 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3613 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3614 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3615 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3616 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3617 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3618 }
3619 
3620 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3621 {
3622 	u32 qman_offset;
3623 	int i;
3624 
3625 	if (hdev->asic_prop.fw_security_enabled)
3626 		return;
3627 
3628 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3629 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3630 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3631 
3632 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3633 	}
3634 
3635 	WREG32(mmMME0_QM_CGM_CFG, 0);
3636 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3637 	WREG32(mmMME2_QM_CGM_CFG, 0);
3638 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3639 
3640 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3641 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3642 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3643 
3644 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3645 	}
3646 }
3647 
3648 static void gaudi_enable_timestamp(struct hl_device *hdev)
3649 {
3650 	/* Disable the timestamp counter */
3651 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3652 
3653 	/* Zero the lower/upper parts of the 64-bit counter */
3654 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3655 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3656 
3657 	/* Enable the counter */
3658 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3659 }
3660 
3661 static void gaudi_disable_timestamp(struct hl_device *hdev)
3662 {
3663 	/* Disable the timestamp counter */
3664 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3665 }
3666 
3667 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3668 {
3669 	u32 wait_timeout_ms;
3670 
3671 	if (hdev->pldm)
3672 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3673 	else
3674 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3675 
3676 	if (fw_reset)
3677 		goto skip_engines;
3678 
3679 	gaudi_stop_nic_qmans(hdev);
3680 	gaudi_stop_mme_qmans(hdev);
3681 	gaudi_stop_tpc_qmans(hdev);
3682 	gaudi_stop_hbm_dma_qmans(hdev);
3683 	gaudi_stop_pci_dma_qmans(hdev);
3684 
3685 	msleep(wait_timeout_ms);
3686 
3687 	gaudi_pci_dma_stall(hdev);
3688 	gaudi_hbm_dma_stall(hdev);
3689 	gaudi_tpc_stall(hdev);
3690 	gaudi_mme_stall(hdev);
3691 
3692 	msleep(wait_timeout_ms);
3693 
3694 	gaudi_disable_nic_qmans(hdev);
3695 	gaudi_disable_mme_qmans(hdev);
3696 	gaudi_disable_tpc_qmans(hdev);
3697 	gaudi_disable_hbm_dma_qmans(hdev);
3698 	gaudi_disable_pci_dma_qmans(hdev);
3699 
3700 	gaudi_disable_timestamp(hdev);
3701 
3702 skip_engines:
3703 	gaudi_disable_msi(hdev);
3704 }
3705 
3706 static int gaudi_mmu_init(struct hl_device *hdev)
3707 {
3708 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3709 	struct gaudi_device *gaudi = hdev->asic_specific;
3710 	u64 hop0_addr;
3711 	int rc, i;
3712 
3713 	if (!hdev->mmu_enable)
3714 		return 0;
3715 
3716 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3717 		return 0;
3718 
3719 	for (i = 0 ; i < prop->max_asid ; i++) {
3720 		hop0_addr = prop->mmu_pgt_addr +
3721 				(i * prop->mmu_hop_table_size);
3722 
3723 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3724 		if (rc) {
3725 			dev_err(hdev->dev,
3726 				"failed to set hop0 addr for asid %d\n", i);
3727 			goto err;
3728 		}
3729 	}
3730 
3731 	/* init MMU cache manage page */
3732 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3733 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3734 
3735 	/* mem cache invalidation */
3736 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3737 
3738 	hl_mmu_invalidate_cache(hdev, true, 0);
3739 
3740 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3741 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3742 
3743 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3744 
3745 	/*
3746 	 * The H/W expects the first PI after init to be 1. After wraparound
3747 	 * we'll write 0.
3748 	 */
3749 	gaudi->mmu_cache_inv_pi = 1;
3750 
3751 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3752 
3753 	return 0;
3754 
3755 err:
3756 	return rc;
3757 }
3758 
3759 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3760 {
3761 	void __iomem *dst;
3762 
3763 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3764 
3765 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3766 }
3767 
3768 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3769 {
3770 	void __iomem *dst;
3771 
3772 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3773 
3774 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3775 }
3776 
3777 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3778 {
3779 	struct dynamic_fw_load_mgr *dynamic_loader;
3780 	struct cpu_dyn_regs *dyn_regs;
3781 
3782 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3783 
3784 	/*
3785 	 * here we update initial values for few specific dynamic regs (as
3786 	 * before reading the first descriptor from FW those value has to be
3787 	 * hard-coded) in later stages of the protocol those values will be
3788 	 * updated automatically by reading the FW descriptor so data there
3789 	 * will always be up-to-date
3790 	 */
3791 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3792 	dyn_regs->kmd_msg_to_cpu =
3793 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3794 	dyn_regs->cpu_cmd_status_to_host =
3795 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3796 
3797 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3798 }
3799 
3800 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3801 {
3802 	struct static_fw_load_mgr *static_loader;
3803 
3804 	static_loader = &hdev->fw_loader.static_loader;
3805 
3806 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3807 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3808 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3809 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3810 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3811 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3812 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3813 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3814 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3815 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3816 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3817 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3818 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3819 			GAUDI_PLDM_RESET_WAIT_MSEC :
3820 			GAUDI_CPU_RESET_WAIT_MSEC;
3821 }
3822 
3823 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3824 {
3825 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3826 
3827 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3828 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3829 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3830 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3831 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3832 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3833 }
3834 
3835 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3836 {
3837 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3838 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3839 
3840 	/* fill common fields */
3841 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3842 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3843 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3844 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3845 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3846 	fw_loader->skip_bmc = !hdev->bmc_enable;
3847 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3848 	fw_loader->dram_bar_id = HBM_BAR_ID;
3849 
3850 	if (prop->dynamic_fw_load)
3851 		gaudi_init_dynamic_firmware_loader(hdev);
3852 	else
3853 		gaudi_init_static_firmware_loader(hdev);
3854 }
3855 
3856 static int gaudi_init_cpu(struct hl_device *hdev)
3857 {
3858 	struct gaudi_device *gaudi = hdev->asic_specific;
3859 	int rc;
3860 
3861 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3862 		return 0;
3863 
3864 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3865 		return 0;
3866 
3867 	/*
3868 	 * The device CPU works with 40 bits addresses.
3869 	 * This register sets the extension to 50 bits.
3870 	 */
3871 	if (!hdev->asic_prop.fw_security_enabled)
3872 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3873 
3874 	rc = hl_fw_init_cpu(hdev);
3875 
3876 	if (rc)
3877 		return rc;
3878 
3879 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3880 
3881 	return 0;
3882 }
3883 
3884 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3885 {
3886 	struct cpu_dyn_regs *dyn_regs =
3887 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3888 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3889 	struct gaudi_device *gaudi = hdev->asic_specific;
3890 	u32 status, irq_handler_offset;
3891 	struct hl_eq *eq;
3892 	struct hl_hw_queue *cpu_pq =
3893 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3894 	int err;
3895 
3896 	if (!hdev->cpu_queues_enable)
3897 		return 0;
3898 
3899 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3900 		return 0;
3901 
3902 	eq = &hdev->event_queue;
3903 
3904 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3905 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3906 
3907 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3908 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3909 
3910 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3911 			lower_32_bits(hdev->cpu_accessible_dma_address));
3912 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3913 			upper_32_bits(hdev->cpu_accessible_dma_address));
3914 
3915 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3916 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3917 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3918 
3919 	/* Used for EQ CI */
3920 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3921 
3922 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3923 
3924 	if (gaudi->multi_msi_mode)
3925 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3926 	else
3927 		WREG32(mmCPU_IF_QUEUE_INIT,
3928 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3929 
3930 	irq_handler_offset = prop->gic_interrupts_enable ?
3931 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3932 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3933 
3934 	WREG32(irq_handler_offset,
3935 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3936 
3937 	err = hl_poll_timeout(
3938 		hdev,
3939 		mmCPU_IF_QUEUE_INIT,
3940 		status,
3941 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3942 		1000,
3943 		cpu_timeout);
3944 
3945 	if (err) {
3946 		dev_err(hdev->dev,
3947 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3948 		return -EIO;
3949 	}
3950 
3951 	/* update FW application security bits */
3952 	if (prop->fw_cpu_boot_dev_sts0_valid)
3953 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3954 	if (prop->fw_cpu_boot_dev_sts1_valid)
3955 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3956 
3957 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3958 	return 0;
3959 }
3960 
3961 static void gaudi_pre_hw_init(struct hl_device *hdev)
3962 {
3963 	/* Perform read from the device to make sure device is up */
3964 	RREG32(mmHW_STATE);
3965 
3966 	if (!hdev->asic_prop.fw_security_enabled) {
3967 		/* Set the access through PCI bars (Linux driver only) as
3968 		 * secured
3969 		 */
3970 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3971 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3972 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3973 
3974 		/* Perform read to flush the waiting writes to ensure
3975 		 * configuration was set in the device
3976 		 */
3977 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3978 	}
3979 
3980 	/*
3981 	 * Let's mark in the H/W that we have reached this point. We check
3982 	 * this value in the reset_before_init function to understand whether
3983 	 * we need to reset the chip before doing H/W init. This register is
3984 	 * cleared by the H/W upon H/W reset
3985 	 */
3986 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3987 }
3988 
3989 static int gaudi_hw_init(struct hl_device *hdev)
3990 {
3991 	struct gaudi_device *gaudi = hdev->asic_specific;
3992 	int rc;
3993 
3994 	gaudi_pre_hw_init(hdev);
3995 
3996 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3997 	 * So we set it here and if anyone tries to move it later to
3998 	 * a different address, there will be an error
3999 	 */
4000 	if (hdev->asic_prop.iatu_done_by_fw)
4001 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4002 
4003 	/*
4004 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
4005 	 * base address of dram
4006 	 */
4007 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4008 		dev_err(hdev->dev,
4009 			"failed to map HBM bar to DRAM base address\n");
4010 		return -EIO;
4011 	}
4012 
4013 	rc = gaudi_init_cpu(hdev);
4014 	if (rc) {
4015 		dev_err(hdev->dev, "failed to initialize CPU\n");
4016 		return rc;
4017 	}
4018 
4019 	/* In case the clock gating was enabled in preboot we need to disable
4020 	 * it here before touching the MME/TPC registers.
4021 	 */
4022 	gaudi_disable_clock_gating(hdev);
4023 
4024 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4025 	gaudi_init_scrambler_sram(hdev);
4026 
4027 	/* This is here just in case we are working without CPU */
4028 	gaudi_init_scrambler_hbm(hdev);
4029 
4030 	gaudi_init_golden_registers(hdev);
4031 
4032 	rc = gaudi_mmu_init(hdev);
4033 	if (rc)
4034 		return rc;
4035 
4036 	gaudi_init_security(hdev);
4037 
4038 	gaudi_init_pci_dma_qmans(hdev);
4039 
4040 	gaudi_init_hbm_dma_qmans(hdev);
4041 
4042 	gaudi_init_mme_qmans(hdev);
4043 
4044 	gaudi_init_tpc_qmans(hdev);
4045 
4046 	gaudi_init_nic_qmans(hdev);
4047 
4048 	gaudi_enable_timestamp(hdev);
4049 
4050 	/* MSI must be enabled before CPU queues and NIC are initialized */
4051 	rc = gaudi_enable_msi(hdev);
4052 	if (rc)
4053 		goto disable_queues;
4054 
4055 	/* must be called after MSI was enabled */
4056 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4057 	if (rc) {
4058 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4059 			rc);
4060 		goto disable_msi;
4061 	}
4062 
4063 	/* Perform read from the device to flush all configuration */
4064 	RREG32(mmHW_STATE);
4065 
4066 	return 0;
4067 
4068 disable_msi:
4069 	gaudi_disable_msi(hdev);
4070 disable_queues:
4071 	gaudi_disable_mme_qmans(hdev);
4072 	gaudi_disable_pci_dma_qmans(hdev);
4073 
4074 	return rc;
4075 }
4076 
4077 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4078 {
4079 	struct cpu_dyn_regs *dyn_regs =
4080 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4081 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4082 	struct gaudi_device *gaudi = hdev->asic_specific;
4083 	bool driver_performs_reset;
4084 
4085 	if (!hard_reset) {
4086 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4087 		return 0;
4088 	}
4089 
4090 	if (hdev->pldm) {
4091 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4092 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4093 	} else {
4094 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4095 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4096 	}
4097 
4098 	if (fw_reset) {
4099 		dev_dbg(hdev->dev,
4100 			"Firmware performs HARD reset, going to wait %dms\n",
4101 			reset_timeout_ms);
4102 
4103 		goto skip_reset;
4104 	}
4105 
4106 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4107 					!hdev->asic_prop.hard_reset_done_by_fw);
4108 
4109 	/* Set device to handle FLR by H/W as we will put the device CPU to
4110 	 * halt mode
4111 	 */
4112 	if (driver_performs_reset)
4113 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4114 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4115 
4116 	/* If linux is loaded in the device CPU we need to communicate with it
4117 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4118 	 * registers in case of old F/Ws
4119 	 */
4120 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4121 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4122 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4123 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4124 
4125 		WREG32(irq_handler_offset,
4126 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4127 
4128 		/* This is a hail-mary attempt to revive the card in the small chance that the
4129 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4130 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4131 		 * reset as if Linux wasn't loaded.
4132 		 *
4133 		 * We do it only if the reset cause was HB, because that would be the indication
4134 		 * of such an event.
4135 		 *
4136 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4137 		 * damage.
4138 		 */
4139 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4140 			if (hdev->asic_prop.hard_reset_done_by_fw)
4141 				hl_fw_ask_hard_reset_without_linux(hdev);
4142 			else
4143 				hl_fw_ask_halt_machine_without_linux(hdev);
4144 		}
4145 	} else {
4146 		if (hdev->asic_prop.hard_reset_done_by_fw)
4147 			hl_fw_ask_hard_reset_without_linux(hdev);
4148 		else
4149 			hl_fw_ask_halt_machine_without_linux(hdev);
4150 	}
4151 
4152 	if (driver_performs_reset) {
4153 
4154 		/* Configure the reset registers. Must be done as early as
4155 		 * possible in case we fail during H/W initialization
4156 		 */
4157 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4158 						(CFG_RST_H_DMA_MASK |
4159 						CFG_RST_H_MME_MASK |
4160 						CFG_RST_H_SM_MASK |
4161 						CFG_RST_H_TPC_7_MASK));
4162 
4163 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4164 
4165 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4166 						(CFG_RST_H_HBM_MASK |
4167 						CFG_RST_H_TPC_7_MASK |
4168 						CFG_RST_H_NIC_MASK |
4169 						CFG_RST_H_SM_MASK |
4170 						CFG_RST_H_DMA_MASK |
4171 						CFG_RST_H_MME_MASK |
4172 						CFG_RST_H_CPU_MASK |
4173 						CFG_RST_H_MMU_MASK));
4174 
4175 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4176 						(CFG_RST_L_IF_MASK |
4177 						CFG_RST_L_PSOC_MASK |
4178 						CFG_RST_L_TPC_MASK));
4179 
4180 		msleep(cpu_timeout_ms);
4181 
4182 		/* Tell ASIC not to re-initialize PCIe */
4183 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4184 
4185 		/* Restart BTL/BLR upon hard-reset */
4186 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4187 
4188 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4189 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4190 
4191 		dev_dbg(hdev->dev,
4192 			"Issued HARD reset command, going to wait %dms\n",
4193 			reset_timeout_ms);
4194 	} else {
4195 		dev_dbg(hdev->dev,
4196 			"Firmware performs HARD reset, going to wait %dms\n",
4197 			reset_timeout_ms);
4198 	}
4199 
4200 skip_reset:
4201 	/*
4202 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4203 	 * itself is in reset. Need to wait until the reset is deasserted
4204 	 */
4205 	msleep(reset_timeout_ms);
4206 
4207 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4208 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4209 		dev_err(hdev->dev,
4210 			"Timeout while waiting for device to reset 0x%x\n",
4211 			status);
4212 
4213 	if (gaudi) {
4214 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4215 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4216 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4217 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4218 						HW_CAP_HBM_SCRAMBLER);
4219 
4220 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4221 
4222 		hdev->device_cpu_is_halted = false;
4223 	}
4224 	return 0;
4225 }
4226 
4227 static int gaudi_suspend(struct hl_device *hdev)
4228 {
4229 	int rc;
4230 
4231 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4232 	if (rc)
4233 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4234 
4235 	return rc;
4236 }
4237 
4238 static int gaudi_resume(struct hl_device *hdev)
4239 {
4240 	return gaudi_init_iatu(hdev);
4241 }
4242 
4243 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4244 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4245 {
4246 	int rc;
4247 
4248 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4249 			VM_DONTCOPY | VM_NORESERVE);
4250 
4251 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4252 				(dma_addr - HOST_PHYS_BASE), size);
4253 	if (rc)
4254 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4255 
4256 	return rc;
4257 }
4258 
4259 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4260 {
4261 	struct cpu_dyn_regs *dyn_regs =
4262 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4263 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4264 	struct gaudi_device *gaudi = hdev->asic_specific;
4265 	bool invalid_queue = false;
4266 	int dma_id;
4267 
4268 	switch (hw_queue_id) {
4269 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4270 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4271 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4273 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274 		break;
4275 
4276 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4277 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4278 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4280 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281 		break;
4282 
4283 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4284 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4285 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288 		break;
4289 
4290 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4291 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4292 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295 		break;
4296 
4297 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4298 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4299 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4300 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4301 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4302 		break;
4303 
4304 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4305 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4306 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4307 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4308 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4309 		break;
4310 
4311 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4312 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4313 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4314 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4315 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4316 		break;
4317 
4318 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4319 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4320 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4321 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4322 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4323 		break;
4324 
4325 	case GAUDI_QUEUE_ID_CPU_PQ:
4326 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4327 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4328 		else
4329 			invalid_queue = true;
4330 		break;
4331 
4332 	case GAUDI_QUEUE_ID_MME_0_0:
4333 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4334 		break;
4335 
4336 	case GAUDI_QUEUE_ID_MME_0_1:
4337 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4338 		break;
4339 
4340 	case GAUDI_QUEUE_ID_MME_0_2:
4341 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4342 		break;
4343 
4344 	case GAUDI_QUEUE_ID_MME_0_3:
4345 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4346 		break;
4347 
4348 	case GAUDI_QUEUE_ID_MME_1_0:
4349 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4350 		break;
4351 
4352 	case GAUDI_QUEUE_ID_MME_1_1:
4353 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4354 		break;
4355 
4356 	case GAUDI_QUEUE_ID_MME_1_2:
4357 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4358 		break;
4359 
4360 	case GAUDI_QUEUE_ID_MME_1_3:
4361 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4362 		break;
4363 
4364 	case GAUDI_QUEUE_ID_TPC_0_0:
4365 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4366 		break;
4367 
4368 	case GAUDI_QUEUE_ID_TPC_0_1:
4369 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4370 		break;
4371 
4372 	case GAUDI_QUEUE_ID_TPC_0_2:
4373 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4374 		break;
4375 
4376 	case GAUDI_QUEUE_ID_TPC_0_3:
4377 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4378 		break;
4379 
4380 	case GAUDI_QUEUE_ID_TPC_1_0:
4381 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4382 		break;
4383 
4384 	case GAUDI_QUEUE_ID_TPC_1_1:
4385 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4386 		break;
4387 
4388 	case GAUDI_QUEUE_ID_TPC_1_2:
4389 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4390 		break;
4391 
4392 	case GAUDI_QUEUE_ID_TPC_1_3:
4393 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4394 		break;
4395 
4396 	case GAUDI_QUEUE_ID_TPC_2_0:
4397 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4398 		break;
4399 
4400 	case GAUDI_QUEUE_ID_TPC_2_1:
4401 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4402 		break;
4403 
4404 	case GAUDI_QUEUE_ID_TPC_2_2:
4405 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4406 		break;
4407 
4408 	case GAUDI_QUEUE_ID_TPC_2_3:
4409 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4410 		break;
4411 
4412 	case GAUDI_QUEUE_ID_TPC_3_0:
4413 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4414 		break;
4415 
4416 	case GAUDI_QUEUE_ID_TPC_3_1:
4417 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4418 		break;
4419 
4420 	case GAUDI_QUEUE_ID_TPC_3_2:
4421 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4422 		break;
4423 
4424 	case GAUDI_QUEUE_ID_TPC_3_3:
4425 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4426 		break;
4427 
4428 	case GAUDI_QUEUE_ID_TPC_4_0:
4429 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4430 		break;
4431 
4432 	case GAUDI_QUEUE_ID_TPC_4_1:
4433 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4434 		break;
4435 
4436 	case GAUDI_QUEUE_ID_TPC_4_2:
4437 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4438 		break;
4439 
4440 	case GAUDI_QUEUE_ID_TPC_4_3:
4441 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_TPC_5_0:
4445 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4446 		break;
4447 
4448 	case GAUDI_QUEUE_ID_TPC_5_1:
4449 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4450 		break;
4451 
4452 	case GAUDI_QUEUE_ID_TPC_5_2:
4453 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4454 		break;
4455 
4456 	case GAUDI_QUEUE_ID_TPC_5_3:
4457 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4458 		break;
4459 
4460 	case GAUDI_QUEUE_ID_TPC_6_0:
4461 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4462 		break;
4463 
4464 	case GAUDI_QUEUE_ID_TPC_6_1:
4465 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4466 		break;
4467 
4468 	case GAUDI_QUEUE_ID_TPC_6_2:
4469 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4470 		break;
4471 
4472 	case GAUDI_QUEUE_ID_TPC_6_3:
4473 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4474 		break;
4475 
4476 	case GAUDI_QUEUE_ID_TPC_7_0:
4477 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4478 		break;
4479 
4480 	case GAUDI_QUEUE_ID_TPC_7_1:
4481 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4482 		break;
4483 
4484 	case GAUDI_QUEUE_ID_TPC_7_2:
4485 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4486 		break;
4487 
4488 	case GAUDI_QUEUE_ID_TPC_7_3:
4489 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4490 		break;
4491 
4492 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4493 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4494 			invalid_queue = true;
4495 
4496 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4497 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4498 		break;
4499 
4500 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4501 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4502 			invalid_queue = true;
4503 
4504 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4505 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4506 		break;
4507 
4508 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4509 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4510 			invalid_queue = true;
4511 
4512 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4513 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4514 		break;
4515 
4516 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4517 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4518 			invalid_queue = true;
4519 
4520 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4521 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4522 		break;
4523 
4524 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4525 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4526 			invalid_queue = true;
4527 
4528 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4529 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4530 		break;
4531 
4532 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4533 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4534 			invalid_queue = true;
4535 
4536 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4537 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4538 		break;
4539 
4540 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4541 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4542 			invalid_queue = true;
4543 
4544 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4545 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4546 		break;
4547 
4548 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4549 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4550 			invalid_queue = true;
4551 
4552 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4553 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4554 		break;
4555 
4556 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4557 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4558 			invalid_queue = true;
4559 
4560 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4561 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4562 		break;
4563 
4564 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4565 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4566 			invalid_queue = true;
4567 
4568 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4569 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4570 		break;
4571 
4572 	default:
4573 		invalid_queue = true;
4574 	}
4575 
4576 	if (invalid_queue) {
4577 		/* Should never get here */
4578 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4579 			hw_queue_id);
4580 		return;
4581 	}
4582 
4583 	db_value = pi;
4584 
4585 	/* ring the doorbell */
4586 	WREG32(db_reg_offset, db_value);
4587 
4588 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4589 		/* make sure device CPU will read latest data from host */
4590 		mb();
4591 
4592 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4593 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4594 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4595 
4596 		WREG32(irq_handler_offset,
4597 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4598 	}
4599 }
4600 
4601 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4602 				struct hl_bd *bd)
4603 {
4604 	__le64 *pbd = (__le64 *) bd;
4605 
4606 	/* The QMANs are on the host memory so a simple copy suffice */
4607 	pqe[0] = pbd[0];
4608 	pqe[1] = pbd[1];
4609 }
4610 
4611 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4612 					dma_addr_t *dma_handle, gfp_t flags)
4613 {
4614 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4615 						dma_handle, flags);
4616 
4617 	/* Shift to the device's base physical address of host memory */
4618 	if (kernel_addr)
4619 		*dma_handle += HOST_PHYS_BASE;
4620 
4621 	return kernel_addr;
4622 }
4623 
4624 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4625 		void *cpu_addr, dma_addr_t dma_handle)
4626 {
4627 	/* Cancel the device's base physical address of host memory */
4628 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4629 
4630 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4631 }
4632 
4633 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4634 {
4635 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4636 	u64 cur_addr = prop->dram_user_base_address;
4637 	u32 chunk_size, busy;
4638 	int rc, dma_id;
4639 
4640 	while (cur_addr < prop->dram_end_address) {
4641 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4642 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4643 
4644 			chunk_size =
4645 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4646 
4647 			dev_dbg(hdev->dev,
4648 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4649 				cur_addr, cur_addr + chunk_size);
4650 
4651 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4652 					lower_32_bits(val));
4653 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4654 					upper_32_bits(val));
4655 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4656 						lower_32_bits(cur_addr));
4657 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4658 						upper_32_bits(cur_addr));
4659 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4660 					chunk_size);
4661 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4662 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4663 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4664 
4665 			cur_addr += chunk_size;
4666 
4667 			if (cur_addr == prop->dram_end_address)
4668 				break;
4669 		}
4670 
4671 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4672 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4673 
4674 			rc = hl_poll_timeout(
4675 				hdev,
4676 				mmDMA0_CORE_STS0 + dma_offset,
4677 				busy,
4678 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4679 				1000,
4680 				HBM_SCRUBBING_TIMEOUT_US);
4681 
4682 			if (rc) {
4683 				dev_err(hdev->dev,
4684 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4685 					dma_id);
4686 				return -EIO;
4687 			}
4688 		}
4689 	}
4690 
4691 	return 0;
4692 }
4693 
4694 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4695 {
4696 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4697 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4698 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4699 	u64 addr, size, val = hdev->memory_scrub_val;
4700 	ktime_t timeout;
4701 	int rc = 0;
4702 
4703 	if (!hdev->memory_scrub)
4704 		return 0;
4705 
4706 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4707 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4708 		if (ktime_compare(ktime_get(), timeout) > 0) {
4709 			dev_err(hdev->dev, "waiting for idle timeout\n");
4710 			return -ETIMEDOUT;
4711 		}
4712 		usleep_range((1000 >> 2) + 1, 1000);
4713 	}
4714 
4715 	/* Scrub SRAM */
4716 	addr = prop->sram_user_base_address;
4717 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4718 
4719 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4720 			addr, addr + size, val);
4721 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4722 	if (rc) {
4723 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4724 		return rc;
4725 	}
4726 
4727 	/* Scrub HBM using all DMA channels in parallel */
4728 	rc = gaudi_scrub_device_dram(hdev, val);
4729 	if (rc) {
4730 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4731 		return rc;
4732 	}
4733 
4734 	return 0;
4735 }
4736 
4737 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4738 				u32 queue_id, dma_addr_t *dma_handle,
4739 				u16 *queue_len)
4740 {
4741 	struct gaudi_device *gaudi = hdev->asic_specific;
4742 	struct gaudi_internal_qman_info *q;
4743 
4744 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4745 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4746 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4747 		return NULL;
4748 	}
4749 
4750 	q = &gaudi->internal_qmans[queue_id];
4751 	*dma_handle = q->pq_dma_addr;
4752 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4753 
4754 	return q->pq_kernel_addr;
4755 }
4756 
4757 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4758 				u16 len, u32 timeout, u64 *result)
4759 {
4760 	struct gaudi_device *gaudi = hdev->asic_specific;
4761 
4762 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4763 		if (result)
4764 			*result = 0;
4765 		return 0;
4766 	}
4767 
4768 	if (!timeout)
4769 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4770 
4771 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4772 						timeout, result);
4773 }
4774 
4775 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4776 {
4777 	struct packet_msg_prot *fence_pkt;
4778 	dma_addr_t pkt_dma_addr;
4779 	u32 fence_val, tmp, timeout_usec;
4780 	dma_addr_t fence_dma_addr;
4781 	u32 *fence_ptr;
4782 	int rc;
4783 
4784 	if (hdev->pldm)
4785 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4786 	else
4787 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4788 
4789 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4790 
4791 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4792 	if (!fence_ptr) {
4793 		dev_err(hdev->dev,
4794 			"Failed to allocate memory for H/W queue %d testing\n",
4795 			hw_queue_id);
4796 		return -ENOMEM;
4797 	}
4798 
4799 	*fence_ptr = 0;
4800 
4801 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4802 						&pkt_dma_addr);
4803 	if (!fence_pkt) {
4804 		dev_err(hdev->dev,
4805 			"Failed to allocate packet for H/W queue %d testing\n",
4806 			hw_queue_id);
4807 		rc = -ENOMEM;
4808 		goto free_fence_ptr;
4809 	}
4810 
4811 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4812 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4813 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4814 
4815 	fence_pkt->ctl = cpu_to_le32(tmp);
4816 	fence_pkt->value = cpu_to_le32(fence_val);
4817 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4818 
4819 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4820 					sizeof(struct packet_msg_prot),
4821 					pkt_dma_addr);
4822 	if (rc) {
4823 		dev_err(hdev->dev,
4824 			"Failed to send fence packet to H/W queue %d\n",
4825 			hw_queue_id);
4826 		goto free_pkt;
4827 	}
4828 
4829 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4830 					1000, timeout_usec, true);
4831 
4832 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4833 
4834 	if (rc == -ETIMEDOUT) {
4835 		dev_err(hdev->dev,
4836 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4837 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4838 		rc = -EIO;
4839 	}
4840 
4841 free_pkt:
4842 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4843 free_fence_ptr:
4844 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4845 	return rc;
4846 }
4847 
4848 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4849 {
4850 	struct gaudi_device *gaudi = hdev->asic_specific;
4851 
4852 	/*
4853 	 * check capability here as send_cpu_message() won't update the result
4854 	 * value if no capability
4855 	 */
4856 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4857 		return 0;
4858 
4859 	return hl_fw_test_cpu_queue(hdev);
4860 }
4861 
4862 static int gaudi_test_queues(struct hl_device *hdev)
4863 {
4864 	int i, rc, ret_val = 0;
4865 
4866 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4867 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4868 			rc = gaudi_test_queue(hdev, i);
4869 			if (rc)
4870 				ret_val = -EINVAL;
4871 		}
4872 	}
4873 
4874 	rc = gaudi_test_cpu_queue(hdev);
4875 	if (rc)
4876 		ret_val = -EINVAL;
4877 
4878 	return ret_val;
4879 }
4880 
4881 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4882 		gfp_t mem_flags, dma_addr_t *dma_handle)
4883 {
4884 	void *kernel_addr;
4885 
4886 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4887 		return NULL;
4888 
4889 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4890 
4891 	/* Shift to the device's base physical address of host memory */
4892 	if (kernel_addr)
4893 		*dma_handle += HOST_PHYS_BASE;
4894 
4895 	return kernel_addr;
4896 }
4897 
4898 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4899 			dma_addr_t dma_addr)
4900 {
4901 	/* Cancel the device's base physical address of host memory */
4902 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4903 
4904 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4905 }
4906 
4907 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4908 					size_t size, dma_addr_t *dma_handle)
4909 {
4910 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4911 }
4912 
4913 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4914 						size_t size, void *vaddr)
4915 {
4916 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4917 }
4918 
4919 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4920 {
4921 	struct scatterlist *sg, *sg_next_iter;
4922 	u32 count, dma_desc_cnt;
4923 	u64 len, len_next;
4924 	dma_addr_t addr, addr_next;
4925 
4926 	dma_desc_cnt = 0;
4927 
4928 	for_each_sgtable_dma_sg(sgt, sg, count) {
4929 		len = sg_dma_len(sg);
4930 		addr = sg_dma_address(sg);
4931 
4932 		if (len == 0)
4933 			break;
4934 
4935 		while ((count + 1) < sgt->nents) {
4936 			sg_next_iter = sg_next(sg);
4937 			len_next = sg_dma_len(sg_next_iter);
4938 			addr_next = sg_dma_address(sg_next_iter);
4939 
4940 			if (len_next == 0)
4941 				break;
4942 
4943 			if ((addr + len == addr_next) &&
4944 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4945 				len += len_next;
4946 				count++;
4947 				sg = sg_next_iter;
4948 			} else {
4949 				break;
4950 			}
4951 		}
4952 
4953 		dma_desc_cnt++;
4954 	}
4955 
4956 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4957 }
4958 
4959 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4960 				struct hl_cs_parser *parser,
4961 				struct packet_lin_dma *user_dma_pkt,
4962 				u64 addr, enum dma_data_direction dir)
4963 {
4964 	struct hl_userptr *userptr;
4965 	int rc;
4966 
4967 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4968 			parser->job_userptr_list, &userptr))
4969 		goto already_pinned;
4970 
4971 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4972 	if (!userptr)
4973 		return -ENOMEM;
4974 
4975 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4976 				userptr);
4977 	if (rc)
4978 		goto free_userptr;
4979 
4980 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4981 
4982 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4983 	if (rc) {
4984 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4985 		goto unpin_memory;
4986 	}
4987 
4988 	userptr->dma_mapped = true;
4989 	userptr->dir = dir;
4990 
4991 already_pinned:
4992 	parser->patched_cb_size +=
4993 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4994 
4995 	return 0;
4996 
4997 unpin_memory:
4998 	list_del(&userptr->job_node);
4999 	hl_unpin_host_memory(hdev, userptr);
5000 free_userptr:
5001 	kfree(userptr);
5002 	return rc;
5003 }
5004 
5005 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5006 				struct hl_cs_parser *parser,
5007 				struct packet_lin_dma *user_dma_pkt,
5008 				bool src_in_host)
5009 {
5010 	enum dma_data_direction dir;
5011 	bool skip_host_mem_pin = false, user_memset;
5012 	u64 addr;
5013 	int rc = 0;
5014 
5015 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5016 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5017 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5018 
5019 	if (src_in_host) {
5020 		if (user_memset)
5021 			skip_host_mem_pin = true;
5022 
5023 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5024 		dir = DMA_TO_DEVICE;
5025 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5026 	} else {
5027 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5028 		dir = DMA_FROM_DEVICE;
5029 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5030 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5031 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5032 	}
5033 
5034 	if (skip_host_mem_pin)
5035 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5036 	else
5037 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5038 						addr, dir);
5039 
5040 	return rc;
5041 }
5042 
5043 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5044 				struct hl_cs_parser *parser,
5045 				struct packet_lin_dma *user_dma_pkt)
5046 {
5047 	bool src_in_host = false;
5048 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5049 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5050 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5051 
5052 	dev_dbg(hdev->dev, "DMA packet details:\n");
5053 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5054 				le64_to_cpu(user_dma_pkt->src_addr));
5055 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5056 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5057 
5058 	/*
5059 	 * Special handling for DMA with size 0. Bypass all validations
5060 	 * because no transactions will be done except for WR_COMP, which
5061 	 * is not a security issue
5062 	 */
5063 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5064 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5065 		return 0;
5066 	}
5067 
5068 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5069 		src_in_host = true;
5070 
5071 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5072 						src_in_host);
5073 }
5074 
5075 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5076 					struct hl_cs_parser *parser,
5077 					struct packet_load_and_exe *user_pkt)
5078 {
5079 	u32 cfg;
5080 
5081 	cfg = le32_to_cpu(user_pkt->cfg);
5082 
5083 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5084 		dev_err(hdev->dev,
5085 			"User not allowed to use Load and Execute\n");
5086 		return -EPERM;
5087 	}
5088 
5089 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5090 
5091 	return 0;
5092 }
5093 
5094 static int gaudi_validate_cb(struct hl_device *hdev,
5095 			struct hl_cs_parser *parser, bool is_mmu)
5096 {
5097 	u32 cb_parsed_length = 0;
5098 	int rc = 0;
5099 
5100 	parser->patched_cb_size = 0;
5101 
5102 	/* cb_user_size is more than 0 so loop will always be executed */
5103 	while (cb_parsed_length < parser->user_cb_size) {
5104 		enum packet_id pkt_id;
5105 		u16 pkt_size;
5106 		struct gaudi_packet *user_pkt;
5107 
5108 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5109 
5110 		pkt_id = (enum packet_id) (
5111 				(le64_to_cpu(user_pkt->header) &
5112 				PACKET_HEADER_PACKET_ID_MASK) >>
5113 					PACKET_HEADER_PACKET_ID_SHIFT);
5114 
5115 		if (!validate_packet_id(pkt_id)) {
5116 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5117 			rc = -EINVAL;
5118 			break;
5119 		}
5120 
5121 		pkt_size = gaudi_packet_sizes[pkt_id];
5122 		cb_parsed_length += pkt_size;
5123 		if (cb_parsed_length > parser->user_cb_size) {
5124 			dev_err(hdev->dev,
5125 				"packet 0x%x is out of CB boundary\n", pkt_id);
5126 			rc = -EINVAL;
5127 			break;
5128 		}
5129 
5130 		switch (pkt_id) {
5131 		case PACKET_MSG_PROT:
5132 			dev_err(hdev->dev,
5133 				"User not allowed to use MSG_PROT\n");
5134 			rc = -EPERM;
5135 			break;
5136 
5137 		case PACKET_CP_DMA:
5138 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5139 			rc = -EPERM;
5140 			break;
5141 
5142 		case PACKET_STOP:
5143 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5144 			rc = -EPERM;
5145 			break;
5146 
5147 		case PACKET_WREG_BULK:
5148 			dev_err(hdev->dev,
5149 				"User not allowed to use WREG_BULK\n");
5150 			rc = -EPERM;
5151 			break;
5152 
5153 		case PACKET_LOAD_AND_EXE:
5154 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5155 				(struct packet_load_and_exe *) user_pkt);
5156 			break;
5157 
5158 		case PACKET_LIN_DMA:
5159 			parser->contains_dma_pkt = true;
5160 			if (is_mmu)
5161 				parser->patched_cb_size += pkt_size;
5162 			else
5163 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5164 					(struct packet_lin_dma *) user_pkt);
5165 			break;
5166 
5167 		case PACKET_WREG_32:
5168 		case PACKET_MSG_LONG:
5169 		case PACKET_MSG_SHORT:
5170 		case PACKET_REPEAT:
5171 		case PACKET_FENCE:
5172 		case PACKET_NOP:
5173 		case PACKET_ARB_POINT:
5174 			parser->patched_cb_size += pkt_size;
5175 			break;
5176 
5177 		default:
5178 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5179 				pkt_id);
5180 			rc = -EINVAL;
5181 			break;
5182 		}
5183 
5184 		if (rc)
5185 			break;
5186 	}
5187 
5188 	/*
5189 	 * The new CB should have space at the end for two MSG_PROT packets:
5190 	 * 1. Optional NOP padding for cacheline alignment
5191 	 * 2. A packet that will act as a completion packet
5192 	 * 3. A packet that will generate MSI interrupt
5193 	 */
5194 	if (parser->completion)
5195 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5196 			parser->patched_cb_size);
5197 
5198 	return rc;
5199 }
5200 
5201 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5202 				struct hl_cs_parser *parser,
5203 				struct packet_lin_dma *user_dma_pkt,
5204 				struct packet_lin_dma *new_dma_pkt,
5205 				u32 *new_dma_pkt_size)
5206 {
5207 	struct hl_userptr *userptr;
5208 	struct scatterlist *sg, *sg_next_iter;
5209 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5210 	u64 len, len_next;
5211 	dma_addr_t dma_addr, dma_addr_next;
5212 	u64 device_memory_addr, addr;
5213 	enum dma_data_direction dir;
5214 	struct sg_table *sgt;
5215 	bool src_in_host = false;
5216 	bool skip_host_mem_pin = false;
5217 	bool user_memset;
5218 
5219 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5220 
5221 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5222 		src_in_host = true;
5223 
5224 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5225 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5226 
5227 	if (src_in_host) {
5228 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5229 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5230 		dir = DMA_TO_DEVICE;
5231 		if (user_memset)
5232 			skip_host_mem_pin = true;
5233 	} else {
5234 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5235 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5236 		dir = DMA_FROM_DEVICE;
5237 	}
5238 
5239 	if ((!skip_host_mem_pin) &&
5240 		(!hl_userptr_is_pinned(hdev, addr,
5241 					le32_to_cpu(user_dma_pkt->tsize),
5242 					parser->job_userptr_list, &userptr))) {
5243 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5244 				addr, user_dma_pkt->tsize);
5245 		return -EFAULT;
5246 	}
5247 
5248 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5249 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5250 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5251 		return 0;
5252 	}
5253 
5254 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5255 
5256 	sgt = userptr->sgt;
5257 	dma_desc_cnt = 0;
5258 
5259 	for_each_sgtable_dma_sg(sgt, sg, count) {
5260 		len = sg_dma_len(sg);
5261 		dma_addr = sg_dma_address(sg);
5262 
5263 		if (len == 0)
5264 			break;
5265 
5266 		while ((count + 1) < sgt->nents) {
5267 			sg_next_iter = sg_next(sg);
5268 			len_next = sg_dma_len(sg_next_iter);
5269 			dma_addr_next = sg_dma_address(sg_next_iter);
5270 
5271 			if (len_next == 0)
5272 				break;
5273 
5274 			if ((dma_addr + len == dma_addr_next) &&
5275 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5276 				len += len_next;
5277 				count++;
5278 				sg = sg_next_iter;
5279 			} else {
5280 				break;
5281 			}
5282 		}
5283 
5284 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5285 		if (likely(dma_desc_cnt))
5286 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5287 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5288 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5289 		new_dma_pkt->tsize = cpu_to_le32(len);
5290 
5291 		if (dir == DMA_TO_DEVICE) {
5292 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5293 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5294 		} else {
5295 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5296 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5297 		}
5298 
5299 		if (!user_memset)
5300 			device_memory_addr += len;
5301 		dma_desc_cnt++;
5302 		new_dma_pkt++;
5303 	}
5304 
5305 	if (!dma_desc_cnt) {
5306 		dev_err(hdev->dev,
5307 			"Error of 0 SG entries when patching DMA packet\n");
5308 		return -EFAULT;
5309 	}
5310 
5311 	/* Fix the last dma packet - wrcomp must be as user set it */
5312 	new_dma_pkt--;
5313 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5314 
5315 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5316 
5317 	return 0;
5318 }
5319 
5320 static int gaudi_patch_cb(struct hl_device *hdev,
5321 				struct hl_cs_parser *parser)
5322 {
5323 	u32 cb_parsed_length = 0;
5324 	u32 cb_patched_cur_length = 0;
5325 	int rc = 0;
5326 
5327 	/* cb_user_size is more than 0 so loop will always be executed */
5328 	while (cb_parsed_length < parser->user_cb_size) {
5329 		enum packet_id pkt_id;
5330 		u16 pkt_size;
5331 		u32 new_pkt_size = 0;
5332 		struct gaudi_packet *user_pkt, *kernel_pkt;
5333 
5334 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5335 		kernel_pkt = parser->patched_cb->kernel_address +
5336 					cb_patched_cur_length;
5337 
5338 		pkt_id = (enum packet_id) (
5339 				(le64_to_cpu(user_pkt->header) &
5340 				PACKET_HEADER_PACKET_ID_MASK) >>
5341 					PACKET_HEADER_PACKET_ID_SHIFT);
5342 
5343 		if (!validate_packet_id(pkt_id)) {
5344 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5345 			rc = -EINVAL;
5346 			break;
5347 		}
5348 
5349 		pkt_size = gaudi_packet_sizes[pkt_id];
5350 		cb_parsed_length += pkt_size;
5351 		if (cb_parsed_length > parser->user_cb_size) {
5352 			dev_err(hdev->dev,
5353 				"packet 0x%x is out of CB boundary\n", pkt_id);
5354 			rc = -EINVAL;
5355 			break;
5356 		}
5357 
5358 		switch (pkt_id) {
5359 		case PACKET_LIN_DMA:
5360 			rc = gaudi_patch_dma_packet(hdev, parser,
5361 					(struct packet_lin_dma *) user_pkt,
5362 					(struct packet_lin_dma *) kernel_pkt,
5363 					&new_pkt_size);
5364 			cb_patched_cur_length += new_pkt_size;
5365 			break;
5366 
5367 		case PACKET_MSG_PROT:
5368 			dev_err(hdev->dev,
5369 				"User not allowed to use MSG_PROT\n");
5370 			rc = -EPERM;
5371 			break;
5372 
5373 		case PACKET_CP_DMA:
5374 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5375 			rc = -EPERM;
5376 			break;
5377 
5378 		case PACKET_STOP:
5379 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5380 			rc = -EPERM;
5381 			break;
5382 
5383 		case PACKET_WREG_32:
5384 		case PACKET_WREG_BULK:
5385 		case PACKET_MSG_LONG:
5386 		case PACKET_MSG_SHORT:
5387 		case PACKET_REPEAT:
5388 		case PACKET_FENCE:
5389 		case PACKET_NOP:
5390 		case PACKET_ARB_POINT:
5391 		case PACKET_LOAD_AND_EXE:
5392 			memcpy(kernel_pkt, user_pkt, pkt_size);
5393 			cb_patched_cur_length += pkt_size;
5394 			break;
5395 
5396 		default:
5397 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5398 				pkt_id);
5399 			rc = -EINVAL;
5400 			break;
5401 		}
5402 
5403 		if (rc)
5404 			break;
5405 	}
5406 
5407 	return rc;
5408 }
5409 
5410 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5411 		struct hl_cs_parser *parser)
5412 {
5413 	u64 handle;
5414 	u32 patched_cb_size;
5415 	struct hl_cb *user_cb;
5416 	int rc;
5417 
5418 	/*
5419 	 * The new CB should have space at the end for two MSG_PROT packets:
5420 	 * 1. Optional NOP padding for cacheline alignment
5421 	 * 2. A packet that will act as a completion packet
5422 	 * 3. A packet that will generate MSI interrupt
5423 	 */
5424 	if (parser->completion)
5425 		parser->patched_cb_size = parser->user_cb_size +
5426 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5427 	else
5428 		parser->patched_cb_size = parser->user_cb_size;
5429 
5430 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5431 				parser->patched_cb_size, false, false,
5432 				&handle);
5433 
5434 	if (rc) {
5435 		dev_err(hdev->dev,
5436 			"Failed to allocate patched CB for DMA CS %d\n",
5437 			rc);
5438 		return rc;
5439 	}
5440 
5441 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5442 	/* hl_cb_get should never fail */
5443 	if (!parser->patched_cb) {
5444 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5445 		rc = -EFAULT;
5446 		goto out;
5447 	}
5448 
5449 	/*
5450 	 * We are protected from overflow because the check
5451 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5452 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5453 	 *
5454 	 * There is no option to reach here without going through that check because:
5455 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5456 	 *    an external queue.
5457 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5458 	 */
5459 	memcpy(parser->patched_cb->kernel_address,
5460 		parser->user_cb->kernel_address,
5461 		parser->user_cb_size);
5462 
5463 	patched_cb_size = parser->patched_cb_size;
5464 
5465 	/* Validate patched CB instead of user CB */
5466 	user_cb = parser->user_cb;
5467 	parser->user_cb = parser->patched_cb;
5468 	rc = gaudi_validate_cb(hdev, parser, true);
5469 	parser->user_cb = user_cb;
5470 
5471 	if (rc) {
5472 		hl_cb_put(parser->patched_cb);
5473 		goto out;
5474 	}
5475 
5476 	if (patched_cb_size != parser->patched_cb_size) {
5477 		dev_err(hdev->dev, "user CB size mismatch\n");
5478 		hl_cb_put(parser->patched_cb);
5479 		rc = -EINVAL;
5480 		goto out;
5481 	}
5482 
5483 out:
5484 	/*
5485 	 * Always call cb destroy here because we still have 1 reference
5486 	 * to it by calling cb_get earlier. After the job will be completed,
5487 	 * cb_put will release it, but here we want to remove it from the
5488 	 * idr
5489 	 */
5490 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5491 
5492 	return rc;
5493 }
5494 
5495 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5496 		struct hl_cs_parser *parser)
5497 {
5498 	u64 handle;
5499 	int rc;
5500 
5501 	rc = gaudi_validate_cb(hdev, parser, false);
5502 
5503 	if (rc)
5504 		goto free_userptr;
5505 
5506 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5507 				parser->patched_cb_size, false, false,
5508 				&handle);
5509 	if (rc) {
5510 		dev_err(hdev->dev,
5511 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5512 		goto free_userptr;
5513 	}
5514 
5515 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5516 	/* hl_cb_get should never fail here */
5517 	if (!parser->patched_cb) {
5518 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5519 		rc = -EFAULT;
5520 		goto out;
5521 	}
5522 
5523 	rc = gaudi_patch_cb(hdev, parser);
5524 
5525 	if (rc)
5526 		hl_cb_put(parser->patched_cb);
5527 
5528 out:
5529 	/*
5530 	 * Always call cb destroy here because we still have 1 reference
5531 	 * to it by calling cb_get earlier. After the job will be completed,
5532 	 * cb_put will release it, but here we want to remove it from the
5533 	 * idr
5534 	 */
5535 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5536 
5537 free_userptr:
5538 	if (rc)
5539 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5540 	return rc;
5541 }
5542 
5543 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5544 					struct hl_cs_parser *parser)
5545 {
5546 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5547 	struct gaudi_device *gaudi = hdev->asic_specific;
5548 	u32 nic_queue_offset, nic_mask_q_id;
5549 
5550 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5551 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5552 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5553 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5554 
5555 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5556 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5557 			return -EINVAL;
5558 		}
5559 	}
5560 
5561 	/* For internal queue jobs just check if CB address is valid */
5562 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5563 					parser->user_cb_size,
5564 					asic_prop->sram_user_base_address,
5565 					asic_prop->sram_end_address))
5566 		return 0;
5567 
5568 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5569 					parser->user_cb_size,
5570 					asic_prop->dram_user_base_address,
5571 					asic_prop->dram_end_address))
5572 		return 0;
5573 
5574 	/* PMMU and HPMMU addresses are equal, check only one of them */
5575 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5576 					parser->user_cb_size,
5577 					asic_prop->pmmu.start_addr,
5578 					asic_prop->pmmu.end_addr))
5579 		return 0;
5580 
5581 	dev_err(hdev->dev,
5582 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5583 		parser->user_cb, parser->user_cb_size);
5584 
5585 	return -EFAULT;
5586 }
5587 
5588 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5589 {
5590 	struct gaudi_device *gaudi = hdev->asic_specific;
5591 
5592 	if (parser->queue_type == QUEUE_TYPE_INT)
5593 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5594 
5595 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5596 		return gaudi_parse_cb_mmu(hdev, parser);
5597 	else
5598 		return gaudi_parse_cb_no_mmu(hdev, parser);
5599 }
5600 
5601 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5602 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5603 				u32 msi_vec, bool eb)
5604 {
5605 	struct gaudi_device *gaudi = hdev->asic_specific;
5606 	struct packet_msg_prot *cq_pkt;
5607 	struct packet_nop *cq_padding;
5608 	u64 msi_addr;
5609 	u32 tmp;
5610 
5611 	cq_padding = kernel_address + original_len;
5612 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5613 
5614 	while ((void *)cq_padding < (void *)cq_pkt) {
5615 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5616 		cq_padding++;
5617 	}
5618 
5619 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5620 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5621 
5622 	if (eb)
5623 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5624 
5625 	cq_pkt->ctl = cpu_to_le32(tmp);
5626 	cq_pkt->value = cpu_to_le32(cq_val);
5627 	cq_pkt->addr = cpu_to_le64(cq_addr);
5628 
5629 	cq_pkt++;
5630 
5631 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5632 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5633 	cq_pkt->ctl = cpu_to_le32(tmp);
5634 	cq_pkt->value = cpu_to_le32(1);
5635 
5636 	if (gaudi->multi_msi_mode)
5637 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5638 	else
5639 		msi_addr = mmPCIE_CORE_MSI_REQ;
5640 
5641 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5642 }
5643 
5644 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5645 {
5646 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5647 }
5648 
5649 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5650 					u32 size, u64 val)
5651 {
5652 	struct packet_lin_dma *lin_dma_pkt;
5653 	struct hl_cs_job *job;
5654 	u32 cb_size, ctl, err_cause;
5655 	struct hl_cb *cb;
5656 	int rc;
5657 
5658 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5659 	if (!cb)
5660 		return -EFAULT;
5661 
5662 	lin_dma_pkt = cb->kernel_address;
5663 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5664 	cb_size = sizeof(*lin_dma_pkt);
5665 
5666 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5667 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5668 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5669 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5670 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5671 
5672 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5673 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5674 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5675 	lin_dma_pkt->tsize = cpu_to_le32(size);
5676 
5677 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5678 	if (!job) {
5679 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5680 		rc = -ENOMEM;
5681 		goto release_cb;
5682 	}
5683 
5684 	/* Verify DMA is OK */
5685 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5686 	if (err_cause && !hdev->init_done) {
5687 		dev_dbg(hdev->dev,
5688 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5689 			err_cause);
5690 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5691 	}
5692 
5693 	job->id = 0;
5694 	job->user_cb = cb;
5695 	atomic_inc(&job->user_cb->cs_cnt);
5696 	job->user_cb_size = cb_size;
5697 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5698 	job->patched_cb = job->user_cb;
5699 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5700 
5701 	hl_debugfs_add_job(hdev, job);
5702 
5703 	rc = gaudi_send_job_on_qman0(hdev, job);
5704 	hl_debugfs_remove_job(hdev, job);
5705 	kfree(job);
5706 	atomic_dec(&cb->cs_cnt);
5707 
5708 	/* Verify DMA is OK */
5709 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5710 	if (err_cause) {
5711 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5712 		rc = -EIO;
5713 		if (!hdev->init_done) {
5714 			dev_dbg(hdev->dev,
5715 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5716 				err_cause);
5717 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5718 		}
5719 	}
5720 
5721 release_cb:
5722 	hl_cb_put(cb);
5723 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5724 
5725 	return rc;
5726 }
5727 
5728 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5729 					u32 num_regs, u32 val)
5730 {
5731 	struct packet_msg_long *pkt;
5732 	struct hl_cs_job *job;
5733 	u32 cb_size, ctl;
5734 	struct hl_cb *cb;
5735 	int i, rc;
5736 
5737 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5738 
5739 	if (cb_size > SZ_2M) {
5740 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5741 		return -ENOMEM;
5742 	}
5743 
5744 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5745 	if (!cb)
5746 		return -EFAULT;
5747 
5748 	pkt = cb->kernel_address;
5749 
5750 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5751 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5752 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5753 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5754 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5755 
5756 	for (i = 0; i < num_regs ; i++, pkt++) {
5757 		pkt->ctl = cpu_to_le32(ctl);
5758 		pkt->value = cpu_to_le32(val);
5759 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5760 	}
5761 
5762 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5763 	if (!job) {
5764 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5765 		rc = -ENOMEM;
5766 		goto release_cb;
5767 	}
5768 
5769 	job->id = 0;
5770 	job->user_cb = cb;
5771 	atomic_inc(&job->user_cb->cs_cnt);
5772 	job->user_cb_size = cb_size;
5773 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5774 	job->patched_cb = job->user_cb;
5775 	job->job_cb_size = cb_size;
5776 
5777 	hl_debugfs_add_job(hdev, job);
5778 
5779 	rc = gaudi_send_job_on_qman0(hdev, job);
5780 	hl_debugfs_remove_job(hdev, job);
5781 	kfree(job);
5782 	atomic_dec(&cb->cs_cnt);
5783 
5784 release_cb:
5785 	hl_cb_put(cb);
5786 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5787 
5788 	return rc;
5789 }
5790 
5791 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5792 {
5793 	u64 base_addr;
5794 	u32 num_regs;
5795 	int rc;
5796 
5797 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5798 	num_regs = NUM_OF_SOB_IN_BLOCK;
5799 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5800 	if (rc) {
5801 		dev_err(hdev->dev, "failed resetting SM registers");
5802 		return -ENOMEM;
5803 	}
5804 
5805 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5806 	num_regs = NUM_OF_SOB_IN_BLOCK;
5807 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5808 	if (rc) {
5809 		dev_err(hdev->dev, "failed resetting SM registers");
5810 		return -ENOMEM;
5811 	}
5812 
5813 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5814 	num_regs = NUM_OF_SOB_IN_BLOCK;
5815 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5816 	if (rc) {
5817 		dev_err(hdev->dev, "failed resetting SM registers");
5818 		return -ENOMEM;
5819 	}
5820 
5821 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5822 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5823 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5824 	if (rc) {
5825 		dev_err(hdev->dev, "failed resetting SM registers");
5826 		return -ENOMEM;
5827 	}
5828 
5829 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5830 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5831 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5832 	if (rc) {
5833 		dev_err(hdev->dev, "failed resetting SM registers");
5834 		return -ENOMEM;
5835 	}
5836 
5837 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5838 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5839 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5840 	if (rc) {
5841 		dev_err(hdev->dev, "failed resetting SM registers");
5842 		return -ENOMEM;
5843 	}
5844 
5845 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5846 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5847 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5848 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5849 	if (rc) {
5850 		dev_err(hdev->dev, "failed resetting SM registers");
5851 		return -ENOMEM;
5852 	}
5853 
5854 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5855 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5856 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5857 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5858 	if (rc) {
5859 		dev_err(hdev->dev, "failed resetting SM registers");
5860 		return -ENOMEM;
5861 	}
5862 
5863 	return 0;
5864 }
5865 
5866 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5867 {
5868 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5869 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5870 	int i;
5871 
5872 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5873 		u64 sob_addr = CFG_BASE +
5874 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5875 				(i * sob_delta);
5876 		u32 dma_offset = i * DMA_CORE_OFFSET;
5877 
5878 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5879 				lower_32_bits(sob_addr));
5880 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5881 				upper_32_bits(sob_addr));
5882 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5883 
5884 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5885 		 * modified by the user for SRAM reduction
5886 		 */
5887 		if (i > 1)
5888 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5889 								0x00000001);
5890 	}
5891 }
5892 
5893 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5894 {
5895 	u32 qman_offset;
5896 	int i;
5897 
5898 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5899 		qman_offset = i * DMA_QMAN_OFFSET;
5900 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5901 	}
5902 
5903 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5904 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5905 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5906 	}
5907 
5908 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5909 		qman_offset = i * TPC_QMAN_OFFSET;
5910 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5911 	}
5912 
5913 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5914 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5915 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5916 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5917 	}
5918 }
5919 
5920 static int gaudi_restore_user_registers(struct hl_device *hdev)
5921 {
5922 	int rc;
5923 
5924 	rc = gaudi_restore_sm_registers(hdev);
5925 	if (rc)
5926 		return rc;
5927 
5928 	gaudi_restore_dma_registers(hdev);
5929 	gaudi_restore_qm_registers(hdev);
5930 
5931 	return 0;
5932 }
5933 
5934 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5935 {
5936 	return 0;
5937 }
5938 
5939 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5940 {
5941 	u32 size = hdev->asic_prop.mmu_pgt_size +
5942 			hdev->asic_prop.mmu_cache_mng_size;
5943 	struct gaudi_device *gaudi = hdev->asic_specific;
5944 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5945 
5946 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5947 		return 0;
5948 
5949 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5950 }
5951 
5952 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5953 {
5954 
5955 }
5956 
5957 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5958 					u32 size_to_dma, dma_addr_t dma_addr)
5959 {
5960 	u32 err_cause, val;
5961 	u64 dma_offset;
5962 	int rc;
5963 
5964 	dma_offset = dma_id * DMA_CORE_OFFSET;
5965 
5966 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5967 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5968 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5969 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5970 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5971 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5972 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5973 
5974 	rc = hl_poll_timeout(
5975 		hdev,
5976 		mmDMA0_CORE_STS0 + dma_offset,
5977 		val,
5978 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5979 		0,
5980 		1000000);
5981 
5982 	if (rc) {
5983 		dev_err(hdev->dev,
5984 			"DMA %d timed-out during reading of 0x%llx\n",
5985 			dma_id, addr);
5986 		return -EIO;
5987 	}
5988 
5989 	/* Verify DMA is OK */
5990 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5991 	if (err_cause) {
5992 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5993 		dev_dbg(hdev->dev,
5994 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5995 			err_cause);
5996 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5997 
5998 		return -EIO;
5999 	}
6000 
6001 	return 0;
6002 }
6003 
6004 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6005 				void *blob_addr)
6006 {
6007 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6008 	u32 qm_glbl_sts0, qm_cgm_sts;
6009 	u64 dma_offset, qm_offset;
6010 	dma_addr_t dma_addr;
6011 	void *kernel_addr;
6012 	bool is_eng_idle;
6013 	int rc = 0, dma_id;
6014 
6015 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6016 
6017 	if (!kernel_addr)
6018 		return -ENOMEM;
6019 
6020 	hdev->asic_funcs->hw_queues_lock(hdev);
6021 
6022 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6023 	dma_offset = dma_id * DMA_CORE_OFFSET;
6024 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6025 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6026 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6027 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6028 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6029 		      IS_DMA_IDLE(dma_core_sts0);
6030 
6031 	if (!is_eng_idle) {
6032 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6033 		dma_offset = dma_id * DMA_CORE_OFFSET;
6034 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6035 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6036 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6037 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6038 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6039 			      IS_DMA_IDLE(dma_core_sts0);
6040 
6041 		if (!is_eng_idle) {
6042 			dev_err_ratelimited(hdev->dev,
6043 				"Can't read via DMA because it is BUSY\n");
6044 			rc = -EAGAIN;
6045 			goto out;
6046 		}
6047 	}
6048 
6049 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6050 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6051 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6052 
6053 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6054 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6055 	 * ASID
6056 	 */
6057 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6058 
6059 	/* Verify DMA is OK */
6060 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6061 	if (err_cause) {
6062 		dev_dbg(hdev->dev,
6063 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6064 			err_cause);
6065 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6066 	}
6067 
6068 	pos = 0;
6069 	size_left = size;
6070 	size_to_dma = SZ_2M;
6071 
6072 	while (size_left > 0) {
6073 
6074 		if (size_left < SZ_2M)
6075 			size_to_dma = size_left;
6076 
6077 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6078 						dma_addr);
6079 		if (rc)
6080 			break;
6081 
6082 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6083 
6084 		if (size_left <= SZ_2M)
6085 			break;
6086 
6087 		pos += SZ_2M;
6088 		addr += SZ_2M;
6089 		size_left -= SZ_2M;
6090 	}
6091 
6092 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6093 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6094 	 * ASID
6095 	 */
6096 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6097 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6098 
6099 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6100 
6101 out:
6102 	hdev->asic_funcs->hw_queues_unlock(hdev);
6103 
6104 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6105 
6106 	return rc;
6107 }
6108 
6109 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6110 {
6111 	struct gaudi_device *gaudi = hdev->asic_specific;
6112 
6113 	if (hdev->reset_info.hard_reset_pending)
6114 		return U64_MAX;
6115 
6116 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6117 			(addr - gaudi->hbm_bar_cur_addr));
6118 }
6119 
6120 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6121 {
6122 	struct gaudi_device *gaudi = hdev->asic_specific;
6123 
6124 	if (hdev->reset_info.hard_reset_pending)
6125 		return;
6126 
6127 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6128 			(addr - gaudi->hbm_bar_cur_addr));
6129 }
6130 
6131 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6132 {
6133 	/* mask to zero the MMBP and ASID bits */
6134 	WREG32_AND(reg, ~0x7FF);
6135 	WREG32_OR(reg, asid);
6136 }
6137 
6138 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6139 {
6140 	struct gaudi_device *gaudi = hdev->asic_specific;
6141 
6142 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6143 		return;
6144 
6145 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6146 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6147 		return;
6148 	}
6149 
6150 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6152 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6155 
6156 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6161 
6162 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6167 
6168 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173 
6174 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6176 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6179 
6180 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6185 
6186 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6191 
6192 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6197 
6198 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6206 
6207 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6213 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6214 
6215 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6221 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6222 
6223 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6224 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6225 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6226 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6227 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6228 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6229 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6230 
6231 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6232 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6233 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6234 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6235 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6236 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6237 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6238 
6239 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6240 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6241 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6242 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6243 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6244 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6245 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6246 
6247 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6248 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6249 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6250 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6251 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6252 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6253 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6254 
6255 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6256 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6257 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6258 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6259 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6260 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6261 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6262 
6263 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6264 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6265 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6266 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6267 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6268 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6269 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6270 
6271 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6272 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6273 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6274 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6275 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6276 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6277 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6278 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6279 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6280 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6281 
6282 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6283 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6284 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6285 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6286 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6287 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6288 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6289 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6290 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6291 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6292 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6293 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6294 
6295 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6296 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6297 				asid);
6298 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6299 				asid);
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6301 				asid);
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6305 				asid);
6306 	}
6307 
6308 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6309 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6310 				asid);
6311 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6312 				asid);
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6314 				asid);
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6318 				asid);
6319 	}
6320 
6321 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6322 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6323 				asid);
6324 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6325 				asid);
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6327 				asid);
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6331 				asid);
6332 	}
6333 
6334 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6335 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6336 				asid);
6337 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6338 				asid);
6339 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6340 				asid);
6341 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6342 				asid);
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6344 				asid);
6345 	}
6346 
6347 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6348 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6349 				asid);
6350 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6351 				asid);
6352 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6353 				asid);
6354 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6355 				asid);
6356 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6357 				asid);
6358 	}
6359 
6360 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6361 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6362 				asid);
6363 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6364 				asid);
6365 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6366 				asid);
6367 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6368 				asid);
6369 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6370 				asid);
6371 	}
6372 
6373 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6374 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6375 				asid);
6376 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6377 				asid);
6378 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6379 				asid);
6380 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6381 				asid);
6382 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6383 				asid);
6384 	}
6385 
6386 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6387 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6388 				asid);
6389 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6390 				asid);
6391 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6392 				asid);
6393 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6394 				asid);
6395 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6396 				asid);
6397 	}
6398 
6399 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6400 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6401 				asid);
6402 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6403 				asid);
6404 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6405 				asid);
6406 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6407 				asid);
6408 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6409 				asid);
6410 	}
6411 
6412 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6413 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6414 				asid);
6415 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6416 				asid);
6417 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6418 				asid);
6419 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6420 				asid);
6421 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6422 				asid);
6423 	}
6424 
6425 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6426 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6427 }
6428 
6429 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6430 		struct hl_cs_job *job)
6431 {
6432 	struct packet_msg_prot *fence_pkt;
6433 	u32 *fence_ptr;
6434 	dma_addr_t fence_dma_addr;
6435 	struct hl_cb *cb;
6436 	u32 tmp, timeout, dma_offset;
6437 	int rc;
6438 
6439 	if (hdev->pldm)
6440 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6441 	else
6442 		timeout = HL_DEVICE_TIMEOUT_USEC;
6443 
6444 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6445 	if (!fence_ptr) {
6446 		dev_err(hdev->dev,
6447 			"Failed to allocate fence memory for QMAN0\n");
6448 		return -ENOMEM;
6449 	}
6450 
6451 	cb = job->patched_cb;
6452 
6453 	fence_pkt = cb->kernel_address +
6454 			job->job_cb_size - sizeof(struct packet_msg_prot);
6455 
6456 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6457 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6458 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6459 
6460 	fence_pkt->ctl = cpu_to_le32(tmp);
6461 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6462 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6463 
6464 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6465 
6466 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6467 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6468 
6469 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6470 					job->job_cb_size, cb->bus_address);
6471 	if (rc) {
6472 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6473 		goto free_fence_ptr;
6474 	}
6475 
6476 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6477 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6478 				timeout, true);
6479 
6480 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6481 
6482 	if (rc == -ETIMEDOUT) {
6483 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6484 		goto free_fence_ptr;
6485 	}
6486 
6487 free_fence_ptr:
6488 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6489 
6490 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6491 	return rc;
6492 }
6493 
6494 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6495 {
6496 	if (event_type >= GAUDI_EVENT_SIZE)
6497 		goto event_not_supported;
6498 
6499 	if (!gaudi_irq_map_table[event_type].valid)
6500 		goto event_not_supported;
6501 
6502 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6503 
6504 	return;
6505 
6506 event_not_supported:
6507 	snprintf(desc, size, "N/A");
6508 }
6509 
6510 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6511 							bool is_write, u16 *engine_id_1,
6512 							u16 *engine_id_2)
6513 {
6514 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6515 
6516 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6517 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6518 
6519 	switch (x_y) {
6520 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6521 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6522 		dma_id[0] = 0;
6523 		dma_id[1] = 2;
6524 		break;
6525 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6526 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6527 		dma_id[0] = 1;
6528 		dma_id[1] = 3;
6529 		break;
6530 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6531 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6532 		dma_id[0] = 4;
6533 		dma_id[1] = 6;
6534 		break;
6535 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6536 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6537 		dma_id[0] = 5;
6538 		dma_id[1] = 7;
6539 		break;
6540 	default:
6541 		goto unknown_initiator;
6542 	}
6543 
6544 	for (i = 0 ; i < 2 ; i++) {
6545 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6546 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6547 	}
6548 
6549 	switch (x_y) {
6550 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6551 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6552 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6553 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6554 			return "DMA0";
6555 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6556 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6557 			return "DMA2";
6558 		} else {
6559 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6560 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6561 			return "DMA0 or DMA2";
6562 		}
6563 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6564 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6565 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6566 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6567 			return "DMA1";
6568 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6569 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6570 			return "DMA3";
6571 		} else {
6572 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6573 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6574 			return "DMA1 or DMA3";
6575 		}
6576 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6577 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6578 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6579 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6580 			return "DMA4";
6581 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6582 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6583 			return "DMA6";
6584 		} else {
6585 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6586 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6587 			return "DMA4 or DMA6";
6588 		}
6589 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6590 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6591 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6592 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6593 			return "DMA5";
6594 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6595 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6596 			return "DMA7";
6597 		} else {
6598 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6599 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6600 			return "DMA5 or DMA7";
6601 		}
6602 	}
6603 
6604 unknown_initiator:
6605 	return "unknown initiator";
6606 }
6607 
6608 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6609 							u16 *engine_id_1, u16 *engine_id_2)
6610 {
6611 	u32 val, x_y, axi_id;
6612 
6613 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6614 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6615 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6616 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6617 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6618 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6619 
6620 	switch (x_y) {
6621 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6622 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6623 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6624 			return "TPC0";
6625 		}
6626 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6627 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6628 			return "NIC0";
6629 		}
6630 		break;
6631 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6632 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6633 		return "TPC1";
6634 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6635 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6636 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6637 		return "MME0";
6638 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6639 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6640 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6641 		return "MME1";
6642 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6643 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6644 		return "TPC2";
6645 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6646 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6647 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6648 			return "TPC3";
6649 		}
6650 		/* PCI, CPU or PSOC does not have engine id*/
6651 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6652 			return "PCI";
6653 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6654 			return "CPU";
6655 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6656 			return "PSOC";
6657 		break;
6658 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6659 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6660 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6661 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6662 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6663 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6664 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6665 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6666 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6667 				engine_id_1, engine_id_2);
6668 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6669 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6670 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6671 			return "TPC4";
6672 		}
6673 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6674 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6675 			return "NIC1";
6676 		}
6677 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6678 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6679 			return "NIC2";
6680 		}
6681 		break;
6682 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6683 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6684 		return "TPC5";
6685 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6686 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6687 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6688 		return "MME2";
6689 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6690 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6691 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6692 		return "MME3";
6693 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6694 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6695 		return "TPC6";
6696 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6697 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6698 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6699 			return "TPC7";
6700 		}
6701 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6702 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6703 			return "NIC4";
6704 		}
6705 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6706 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6707 			return "NIC5";
6708 		}
6709 		break;
6710 	default:
6711 		break;
6712 	}
6713 
6714 	dev_err(hdev->dev,
6715 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6716 		val,
6717 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6718 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6719 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6720 			RAZWI_INITIATOR_AXI_ID_MASK);
6721 
6722 	return "unknown initiator";
6723 }
6724 
6725 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6726 						u16 *engine_id_2, bool *is_read, bool *is_write)
6727 {
6728 
6729 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6730 		dev_err_ratelimited(hdev->dev,
6731 			"RAZWI event caused by illegal write of %s\n",
6732 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6733 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6734 		*is_write = true;
6735 	}
6736 
6737 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6738 		dev_err_ratelimited(hdev->dev,
6739 			"RAZWI event caused by illegal read of %s\n",
6740 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6741 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6742 		*is_read = true;
6743 	}
6744 }
6745 
6746 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6747 {
6748 	struct gaudi_device *gaudi = hdev->asic_specific;
6749 	u32 val;
6750 
6751 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6752 		return;
6753 
6754 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6755 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6756 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6757 		*addr <<= 32;
6758 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6759 
6760 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6761 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6762 
6763 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6764 	}
6765 
6766 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6767 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6768 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6769 		*addr <<= 32;
6770 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6771 
6772 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6773 
6774 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6775 	}
6776 }
6777 
6778 /*
6779  *  +-------------------+------------------------------------------------------+
6780  *  | Configuration Reg |                     Description                      |
6781  *  |      Address      |                                                      |
6782  *  +-------------------+------------------------------------------------------+
6783  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6784  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6785  *  |                   |0xF34 memory wrappers 63:32                           |
6786  *  |                   |0xF38 memory wrappers 95:64                           |
6787  *  |                   |0xF3C memory wrappers 127:96                          |
6788  *  +-------------------+------------------------------------------------------+
6789  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6790  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6791  *  |                   |0xF44 memory wrappers 63:32                           |
6792  *  |                   |0xF48 memory wrappers 95:64                           |
6793  *  |                   |0xF4C memory wrappers 127:96                          |
6794  *  +-------------------+------------------------------------------------------+
6795  */
6796 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6797 		struct ecc_info_extract_params *params, u64 *ecc_address,
6798 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6799 {
6800 	u32 i, num_mem_regs, reg, err_bit;
6801 	u64 err_addr, err_word = 0;
6802 
6803 	num_mem_regs = params->num_memories / 32 +
6804 			((params->num_memories % 32) ? 1 : 0);
6805 
6806 	if (params->block_address >= CFG_BASE)
6807 		params->block_address -= CFG_BASE;
6808 
6809 	if (params->derr)
6810 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6811 	else
6812 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6813 
6814 	/* Set invalid wrapper index */
6815 	*memory_wrapper_idx = 0xFF;
6816 
6817 	/* Iterate through memory wrappers, a single bit must be set */
6818 	for (i = 0 ; i < num_mem_regs ; i++) {
6819 		err_addr += i * 4;
6820 		err_word = RREG32(err_addr);
6821 		if (err_word) {
6822 			err_bit = __ffs(err_word);
6823 			*memory_wrapper_idx = err_bit + (32 * i);
6824 			break;
6825 		}
6826 	}
6827 
6828 	if (*memory_wrapper_idx == 0xFF) {
6829 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6830 		return -EINVAL;
6831 	}
6832 
6833 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6834 			*memory_wrapper_idx);
6835 
6836 	*ecc_address =
6837 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6838 	*ecc_syndrom =
6839 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6840 
6841 	/* Clear error indication */
6842 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6843 	if (params->derr)
6844 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6845 	else
6846 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6847 
6848 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6849 
6850 	return 0;
6851 }
6852 
6853 /*
6854  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6855  *
6856  * @idx: the current pi/ci value
6857  * @q_len: the queue length (power of 2)
6858  *
6859  * @return the cyclically decremented index
6860  */
6861 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6862 {
6863 	u32 mask = q_len - 1;
6864 
6865 	/*
6866 	 * modular decrement is equivalent to adding (queue_size -1)
6867 	 * later we take LSBs to make sure the value is in the
6868 	 * range [0, queue_len - 1]
6869 	 */
6870 	return (idx + q_len - 1) & mask;
6871 }
6872 
6873 /**
6874  * gaudi_handle_sw_config_stream_data - print SW config stream data
6875  *
6876  * @hdev: pointer to the habanalabs device structure
6877  * @stream: the QMAN's stream
6878  * @qman_base: base address of QMAN registers block
6879  * @event_mask: mask of the last events occurred
6880  */
6881 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6882 						u64 qman_base, u64 event_mask)
6883 {
6884 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6885 	u32 cq_ptr_lo_off, size;
6886 
6887 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6888 
6889 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6890 						stream * cq_ptr_lo_off;
6891 	cq_ptr_hi = cq_ptr_lo +
6892 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6893 	cq_tsize = cq_ptr_lo +
6894 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6895 
6896 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6897 	size = RREG32(cq_tsize);
6898 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6899 							stream, cq_ptr, size);
6900 
6901 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6902 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6903 		hdev->captured_err_info.undef_opcode.cq_size = size;
6904 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6905 	}
6906 }
6907 
6908 /**
6909  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6910  *
6911  * @hdev: pointer to the habanalabs device structure
6912  * @qid_base: first QID of the QMAN (out of 4 streams)
6913  * @stream: the QMAN's stream
6914  * @qman_base: base address of QMAN registers block
6915  * @event_mask: mask of the last events occurred
6916  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6917  */
6918 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6919 						u32 stream, u64 qman_base,
6920 						u64 event_mask,
6921 						bool pr_sw_conf)
6922 {
6923 	u32 ci, qm_ci_stream_off, queue_len;
6924 	struct hl_hw_queue *q;
6925 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6926 	int i;
6927 
6928 	q = &hdev->kernel_queues[qid_base + stream];
6929 
6930 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6931 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6932 						stream * qm_ci_stream_off;
6933 
6934 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6935 					q->int_queue_len : HL_QUEUE_LENGTH;
6936 
6937 	hdev->asic_funcs->hw_queues_lock(hdev);
6938 
6939 	if (pr_sw_conf)
6940 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6941 
6942 	ci = RREG32(pq_ci);
6943 
6944 	/* we should start printing form ci -1 */
6945 	ci = gaudi_queue_idx_dec(ci, queue_len);
6946 	memset(addr, 0, sizeof(addr));
6947 
6948 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6949 		struct hl_bd *bd;
6950 		u32 len;
6951 
6952 		bd = q->kernel_address;
6953 		bd += ci;
6954 
6955 		len = le32_to_cpu(bd->len);
6956 		/* len 0 means uninitialized entry- break */
6957 		if (!len)
6958 			break;
6959 
6960 		addr[i] = le64_to_cpu(bd->ptr);
6961 
6962 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6963 							stream, ci, addr[i], len);
6964 
6965 		/* get previous ci, wrap if needed */
6966 		ci = gaudi_queue_idx_dec(ci, queue_len);
6967 	}
6968 
6969 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6970 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6971 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6972 
6973 		if (arr_idx == 0) {
6974 			undef_opcode->timestamp = ktime_get();
6975 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6976 		}
6977 
6978 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6979 		undef_opcode->cb_addr_streams_len++;
6980 	}
6981 
6982 	hdev->asic_funcs->hw_queues_unlock(hdev);
6983 }
6984 
6985 /**
6986  * handle_qman_data_on_err - extract QMAN data on error
6987  *
6988  * @hdev: pointer to the habanalabs device structure
6989  * @qid_base: first QID of the QMAN (out of 4 streams)
6990  * @stream: the QMAN's stream
6991  * @qman_base: base address of QMAN registers block
6992  * @event_mask: mask of the last events occurred
6993  *
6994  * This function attempt to exatract as much data as possible on QMAN error.
6995  * On upper CP print the SW config stream data and last 8 PQEs.
6996  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6997  */
6998 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6999 				   u32 stream, u64 qman_base, u64 event_mask)
7000 {
7001 	u32 i;
7002 
7003 	if (stream != QMAN_STREAMS) {
7004 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7005 			qman_base, event_mask, true);
7006 		return;
7007 	}
7008 
7009 	/* handle Lower-CP */
7010 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7011 
7012 	for (i = 0; i < QMAN_STREAMS; i++)
7013 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7014 			qman_base, event_mask, false);
7015 }
7016 
7017 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7018 					  const char *qm_name,
7019 					  u64 qman_base,
7020 					  u32 qid_base,
7021 					  u64 *event_mask)
7022 {
7023 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7024 	u64 glbl_sts_addr, arb_err_addr;
7025 	char reg_desc[32];
7026 
7027 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7028 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7029 
7030 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7031 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7032 		glbl_sts_clr_val = 0;
7033 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7034 
7035 		if (!glbl_sts_val)
7036 			continue;
7037 
7038 		if (i == QMAN_STREAMS)
7039 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7040 		else
7041 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7042 
7043 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7044 			if (glbl_sts_val & BIT(j)) {
7045 				dev_err_ratelimited(hdev->dev,
7046 						"%s %s. err cause: %s\n",
7047 						qm_name, reg_desc,
7048 						gaudi_qman_error_cause[j]);
7049 				glbl_sts_clr_val |= BIT(j);
7050 			}
7051 		}
7052 		/* check for undefined opcode */
7053 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7054 				hdev->captured_err_info.undef_opcode.write_enable) {
7055 			memset(&hdev->captured_err_info.undef_opcode, 0,
7056 						sizeof(hdev->captured_err_info.undef_opcode));
7057 
7058 			hdev->captured_err_info.undef_opcode.write_enable = false;
7059 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7060 		}
7061 
7062 		/* Write 1 clear errors */
7063 		if (!hdev->stop_on_err)
7064 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7065 		else
7066 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7067 	}
7068 
7069 	arb_err_val = RREG32(arb_err_addr);
7070 
7071 	if (!arb_err_val)
7072 		return;
7073 
7074 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7075 		if (arb_err_val & BIT(j)) {
7076 			dev_err_ratelimited(hdev->dev,
7077 					"%s ARB_ERR. err cause: %s\n",
7078 					qm_name,
7079 					gaudi_qman_arb_error_cause[j]);
7080 		}
7081 	}
7082 }
7083 
7084 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7085 		struct hl_eq_sm_sei_data *sei_data)
7086 {
7087 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7088 
7089 	/* Flip the bits as the enum is ordered in the opposite way */
7090 	index = (index ^ 0x3) & 0x3;
7091 
7092 	switch (sei_data->sei_cause) {
7093 	case SM_SEI_SO_OVERFLOW:
7094 		dev_err_ratelimited(hdev->dev,
7095 			"%s SEI Error: SOB Group %u overflow/underflow",
7096 			gaudi_sync_manager_names[index],
7097 			le32_to_cpu(sei_data->sei_log));
7098 		break;
7099 	case SM_SEI_LBW_4B_UNALIGNED:
7100 		dev_err_ratelimited(hdev->dev,
7101 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7102 			gaudi_sync_manager_names[index],
7103 			le32_to_cpu(sei_data->sei_log));
7104 		break;
7105 	case SM_SEI_AXI_RESPONSE_ERR:
7106 		dev_err_ratelimited(hdev->dev,
7107 			"%s SEI Error: AXI ID %u response error",
7108 			gaudi_sync_manager_names[index],
7109 			le32_to_cpu(sei_data->sei_log));
7110 		break;
7111 	default:
7112 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7113 				le32_to_cpu(sei_data->sei_log));
7114 		break;
7115 	}
7116 }
7117 
7118 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7119 		struct hl_eq_ecc_data *ecc_data)
7120 {
7121 	struct ecc_info_extract_params params;
7122 	u64 ecc_address = 0, ecc_syndrom = 0;
7123 	u8 index, memory_wrapper_idx = 0;
7124 	bool extract_info_from_fw;
7125 	int rc;
7126 
7127 	if (hdev->asic_prop.fw_security_enabled) {
7128 		extract_info_from_fw = true;
7129 		goto extract_ecc_info;
7130 	}
7131 
7132 	switch (event_type) {
7133 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7134 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7135 		extract_info_from_fw = true;
7136 		break;
7137 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7138 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7139 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7140 		params.num_memories = 90;
7141 		params.derr = false;
7142 		extract_info_from_fw = false;
7143 		break;
7144 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7145 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7146 		params.block_address =
7147 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7148 		params.num_memories = 90;
7149 		params.derr = true;
7150 		extract_info_from_fw = false;
7151 		break;
7152 	case GAUDI_EVENT_MME0_ACC_SERR:
7153 	case GAUDI_EVENT_MME1_ACC_SERR:
7154 	case GAUDI_EVENT_MME2_ACC_SERR:
7155 	case GAUDI_EVENT_MME3_ACC_SERR:
7156 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7157 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7158 		params.num_memories = 128;
7159 		params.derr = false;
7160 		extract_info_from_fw = false;
7161 		break;
7162 	case GAUDI_EVENT_MME0_ACC_DERR:
7163 	case GAUDI_EVENT_MME1_ACC_DERR:
7164 	case GAUDI_EVENT_MME2_ACC_DERR:
7165 	case GAUDI_EVENT_MME3_ACC_DERR:
7166 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7167 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7168 		params.num_memories = 128;
7169 		params.derr = true;
7170 		extract_info_from_fw = false;
7171 		break;
7172 	case GAUDI_EVENT_MME0_SBAB_SERR:
7173 	case GAUDI_EVENT_MME1_SBAB_SERR:
7174 	case GAUDI_EVENT_MME2_SBAB_SERR:
7175 	case GAUDI_EVENT_MME3_SBAB_SERR:
7176 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7177 		params.block_address =
7178 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7179 		params.num_memories = 33;
7180 		params.derr = false;
7181 		extract_info_from_fw = false;
7182 		break;
7183 	case GAUDI_EVENT_MME0_SBAB_DERR:
7184 	case GAUDI_EVENT_MME1_SBAB_DERR:
7185 	case GAUDI_EVENT_MME2_SBAB_DERR:
7186 	case GAUDI_EVENT_MME3_SBAB_DERR:
7187 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7188 		params.block_address =
7189 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7190 		params.num_memories = 33;
7191 		params.derr = true;
7192 		extract_info_from_fw = false;
7193 		break;
7194 	default:
7195 		return;
7196 	}
7197 
7198 extract_ecc_info:
7199 	if (extract_info_from_fw) {
7200 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7201 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7202 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7203 	} else {
7204 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7205 				&ecc_syndrom, &memory_wrapper_idx);
7206 		if (rc)
7207 			return;
7208 	}
7209 
7210 	dev_err(hdev->dev,
7211 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7212 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7213 }
7214 
7215 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7216 {
7217 	u64 qman_base;
7218 	char desc[32];
7219 	u32 qid_base;
7220 	u8 index;
7221 
7222 	switch (event_type) {
7223 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7224 		index = event_type - GAUDI_EVENT_TPC0_QM;
7225 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7226 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7227 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7228 		break;
7229 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7230 		if (event_type == GAUDI_EVENT_MME0_QM) {
7231 			index = 0;
7232 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7233 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7234 			index = 2;
7235 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7236 		}
7237 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7238 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7239 		break;
7240 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7241 		index = event_type - GAUDI_EVENT_DMA0_QM;
7242 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7243 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7244 		if (index > 1)
7245 			qid_base++;
7246 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7247 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7248 		break;
7249 	case GAUDI_EVENT_NIC0_QM0:
7250 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7251 		qman_base = mmNIC0_QM0_BASE;
7252 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7253 		break;
7254 	case GAUDI_EVENT_NIC0_QM1:
7255 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7256 		qman_base = mmNIC0_QM1_BASE;
7257 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7258 		break;
7259 	case GAUDI_EVENT_NIC1_QM0:
7260 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7261 		qman_base = mmNIC1_QM0_BASE;
7262 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7263 		break;
7264 	case GAUDI_EVENT_NIC1_QM1:
7265 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7266 		qman_base = mmNIC1_QM1_BASE;
7267 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7268 		break;
7269 	case GAUDI_EVENT_NIC2_QM0:
7270 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7271 		qman_base = mmNIC2_QM0_BASE;
7272 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7273 		break;
7274 	case GAUDI_EVENT_NIC2_QM1:
7275 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7276 		qman_base = mmNIC2_QM1_BASE;
7277 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7278 		break;
7279 	case GAUDI_EVENT_NIC3_QM0:
7280 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7281 		qman_base = mmNIC3_QM0_BASE;
7282 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7283 		break;
7284 	case GAUDI_EVENT_NIC3_QM1:
7285 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7286 		qman_base = mmNIC3_QM1_BASE;
7287 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7288 		break;
7289 	case GAUDI_EVENT_NIC4_QM0:
7290 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7291 		qman_base = mmNIC4_QM0_BASE;
7292 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7293 		break;
7294 	case GAUDI_EVENT_NIC4_QM1:
7295 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7296 		qman_base = mmNIC4_QM1_BASE;
7297 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7298 		break;
7299 	default:
7300 		return;
7301 	}
7302 
7303 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7304 }
7305 
7306 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7307 					bool check_razwi, u64 *event_mask)
7308 {
7309 	bool is_read = false, is_write = false;
7310 	u16 engine_id[2], num_of_razwi_eng = 0;
7311 	char desc[64] = "";
7312 	u64 razwi_addr = 0;
7313 	u8 razwi_flags = 0;
7314 
7315 	/*
7316 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7317 	 * engine id it will get valid value.
7318 	 */
7319 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7320 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7321 
7322 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7323 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7324 		event_type, desc);
7325 
7326 	if (check_razwi) {
7327 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7328 						&is_write);
7329 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7330 
7331 		if (is_read)
7332 			razwi_flags |= HL_RAZWI_READ;
7333 		if (is_write)
7334 			razwi_flags |= HL_RAZWI_WRITE;
7335 
7336 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7337 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7338 				num_of_razwi_eng = 2;
7339 			else
7340 				num_of_razwi_eng = 1;
7341 		}
7342 
7343 		if (razwi_flags)
7344 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7345 					razwi_flags, event_mask);
7346 	}
7347 }
7348 
7349 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7350 					struct cpucp_pkt_sync_err *sync_err)
7351 {
7352 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7353 
7354 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7355 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7356 }
7357 
7358 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7359 					struct hl_eq_fw_alive *fw_alive)
7360 {
7361 	dev_err(hdev->dev,
7362 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7363 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7364 		le32_to_cpu(fw_alive->process_id),
7365 		le32_to_cpu(fw_alive->thread_id),
7366 		le64_to_cpu(fw_alive->uptime_seconds));
7367 }
7368 
7369 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7370 						void *data)
7371 {
7372 	char desc[64] = "", *type;
7373 	struct eq_nic_sei_event *eq_nic_sei = data;
7374 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7375 
7376 	switch (eq_nic_sei->axi_error_cause) {
7377 	case RXB:
7378 		type = "RXB";
7379 		break;
7380 	case RXE:
7381 		type = "RXE";
7382 		break;
7383 	case TXS:
7384 		type = "TXS";
7385 		break;
7386 	case TXE:
7387 		type = "TXE";
7388 		break;
7389 	case QPC_RESP:
7390 		type = "QPC_RESP";
7391 		break;
7392 	case NON_AXI_ERR:
7393 		type = "NON_AXI_ERR";
7394 		break;
7395 	case TMR:
7396 		type = "TMR";
7397 		break;
7398 	default:
7399 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7400 			eq_nic_sei->axi_error_cause);
7401 		type = "N/A";
7402 		break;
7403 	}
7404 
7405 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7406 			eq_nic_sei->id);
7407 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7408 		event_type, desc);
7409 }
7410 
7411 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7412 {
7413 	/* GAUDI doesn't support any reset except hard-reset */
7414 	return -EPERM;
7415 }
7416 
7417 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7418 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7419 {
7420 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7421 	int rc = 0;
7422 
7423 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7424 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7425 		if (!hbm_ecc_data) {
7426 			dev_err(hdev->dev, "No FW ECC data");
7427 			return 0;
7428 		}
7429 
7430 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7431 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7433 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7435 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7437 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7438 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7439 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7440 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7441 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7442 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7443 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7444 
7445 		dev_err(hdev->dev,
7446 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7447 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7448 		dev_err(hdev->dev,
7449 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7450 			device, ch, hbm_ecc_data->first_addr, type,
7451 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7452 			hbm_ecc_data->dec_cnt);
7453 		return 0;
7454 	}
7455 
7456 	if (hdev->asic_prop.fw_security_enabled) {
7457 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7458 		return 0;
7459 	}
7460 
7461 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7462 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7463 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7464 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7465 		if (val) {
7466 			rc = -EIO;
7467 			dev_err(hdev->dev,
7468 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7469 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7470 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7471 				(val >> 4) & 0x1);
7472 
7473 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7474 			dev_err(hdev->dev,
7475 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7476 				device, ch * 2,
7477 				RREG32(base + ch * 0x1000 + 0x064),
7478 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7479 				(val2 & 0xFF0000) >> 16,
7480 				(val2 & 0xFF000000) >> 24);
7481 		}
7482 
7483 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7484 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7485 		if (val) {
7486 			rc = -EIO;
7487 			dev_err(hdev->dev,
7488 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7489 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7490 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7491 				(val >> 4) & 0x1);
7492 
7493 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7494 			dev_err(hdev->dev,
7495 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7496 				device, ch * 2 + 1,
7497 				RREG32(base + ch * 0x1000 + 0x074),
7498 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7499 				(val2 & 0xFF0000) >> 16,
7500 				(val2 & 0xFF000000) >> 24);
7501 		}
7502 
7503 		/* Clear interrupts */
7504 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7505 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7506 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7507 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7508 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7509 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7510 	}
7511 
7512 	val  = RREG32(base + 0x8F30);
7513 	val2 = RREG32(base + 0x8F34);
7514 	if (val | val2) {
7515 		rc = -EIO;
7516 		dev_err(hdev->dev,
7517 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7518 			device, val, val2);
7519 	}
7520 	val  = RREG32(base + 0x8F40);
7521 	val2 = RREG32(base + 0x8F44);
7522 	if (val | val2) {
7523 		rc = -EIO;
7524 		dev_err(hdev->dev,
7525 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7526 			device, val, val2);
7527 	}
7528 
7529 	return rc;
7530 }
7531 
7532 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7533 {
7534 	switch (hbm_event_type) {
7535 	case GAUDI_EVENT_HBM0_SPI_0:
7536 	case GAUDI_EVENT_HBM0_SPI_1:
7537 		return 0;
7538 	case GAUDI_EVENT_HBM1_SPI_0:
7539 	case GAUDI_EVENT_HBM1_SPI_1:
7540 		return 1;
7541 	case GAUDI_EVENT_HBM2_SPI_0:
7542 	case GAUDI_EVENT_HBM2_SPI_1:
7543 		return 2;
7544 	case GAUDI_EVENT_HBM3_SPI_0:
7545 	case GAUDI_EVENT_HBM3_SPI_1:
7546 		return 3;
7547 	default:
7548 		break;
7549 	}
7550 
7551 	/* Should never happen */
7552 	return 0;
7553 }
7554 
7555 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7556 					char *interrupt_name)
7557 {
7558 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7559 	bool soft_reset_required = false;
7560 
7561 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7562 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7563 
7564 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7565 		if (tpc_interrupts_cause & BIT(i)) {
7566 			dev_err_ratelimited(hdev->dev,
7567 					"TPC%d_%s interrupt cause: %s\n",
7568 					tpc_id, interrupt_name,
7569 					gaudi_tpc_interrupts_cause[i]);
7570 			/* If this is QM error, we need to soft-reset */
7571 			if (i == 15)
7572 				soft_reset_required = true;
7573 		}
7574 
7575 	/* Clear interrupts */
7576 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7577 
7578 	return soft_reset_required;
7579 }
7580 
7581 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7582 {
7583 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7584 }
7585 
7586 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7587 {
7588 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7589 }
7590 
7591 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7592 {
7593 	ktime_t zero_time = ktime_set(0, 0);
7594 
7595 	mutex_lock(&hdev->clk_throttling.lock);
7596 
7597 	switch (event_type) {
7598 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7599 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7600 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7601 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7602 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7603 		dev_info_ratelimited(hdev->dev,
7604 			"Clock throttling due to power consumption\n");
7605 		break;
7606 
7607 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7608 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7609 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7610 		dev_info_ratelimited(hdev->dev,
7611 			"Power envelop is safe, back to optimal clock\n");
7612 		break;
7613 
7614 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7615 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7616 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7617 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7618 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7619 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7620 		dev_info_ratelimited(hdev->dev,
7621 			"Clock throttling due to overheating\n");
7622 		break;
7623 
7624 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7625 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7626 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7627 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7628 		dev_info_ratelimited(hdev->dev,
7629 			"Thermal envelop is safe, back to optimal clock\n");
7630 		break;
7631 
7632 	default:
7633 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7634 			event_type);
7635 		break;
7636 	}
7637 
7638 	mutex_unlock(&hdev->clk_throttling.lock);
7639 }
7640 
7641 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7642 {
7643 	struct gaudi_device *gaudi = hdev->asic_specific;
7644 	struct hl_info_fw_err_info fw_err_info;
7645 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7646 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7647 	u32 fw_fatal_err_flag = 0, flags = 0;
7648 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7649 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7650 	bool reset_required, reset_direct = false;
7651 	u8 cause;
7652 	int rc;
7653 
7654 	if (event_type >= GAUDI_EVENT_SIZE) {
7655 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7656 				event_type, GAUDI_EVENT_SIZE - 1);
7657 		return;
7658 	}
7659 
7660 	gaudi->events_stat[event_type]++;
7661 	gaudi->events_stat_aggregate[event_type]++;
7662 
7663 	switch (event_type) {
7664 	case GAUDI_EVENT_PCIE_CORE_DERR:
7665 	case GAUDI_EVENT_PCIE_IF_DERR:
7666 	case GAUDI_EVENT_PCIE_PHY_DERR:
7667 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7668 	case GAUDI_EVENT_MME0_ACC_DERR:
7669 	case GAUDI_EVENT_MME0_SBAB_DERR:
7670 	case GAUDI_EVENT_MME1_ACC_DERR:
7671 	case GAUDI_EVENT_MME1_SBAB_DERR:
7672 	case GAUDI_EVENT_MME2_ACC_DERR:
7673 	case GAUDI_EVENT_MME2_SBAB_DERR:
7674 	case GAUDI_EVENT_MME3_ACC_DERR:
7675 	case GAUDI_EVENT_MME3_SBAB_DERR:
7676 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7677 		fallthrough;
7678 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7679 	case GAUDI_EVENT_PSOC_MEM_DERR:
7680 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7681 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7682 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7683 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7684 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7685 	case GAUDI_EVENT_MMU_DERR:
7686 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7687 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7688 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7689 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7690 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7691 		goto reset_device;
7692 
7693 	case GAUDI_EVENT_GIC500:
7694 	case GAUDI_EVENT_AXI_ECC:
7695 	case GAUDI_EVENT_L2_RAM_ECC:
7696 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7697 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7698 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7699 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7700 		goto reset_device;
7701 
7702 	case GAUDI_EVENT_HBM0_SPI_0:
7703 	case GAUDI_EVENT_HBM1_SPI_0:
7704 	case GAUDI_EVENT_HBM2_SPI_0:
7705 	case GAUDI_EVENT_HBM3_SPI_0:
7706 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7707 		gaudi_hbm_read_interrupts(hdev,
7708 				gaudi_hbm_event_to_dev(event_type),
7709 				&eq_entry->hbm_ecc_data);
7710 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7711 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7712 		goto reset_device;
7713 
7714 	case GAUDI_EVENT_HBM0_SPI_1:
7715 	case GAUDI_EVENT_HBM1_SPI_1:
7716 	case GAUDI_EVENT_HBM2_SPI_1:
7717 	case GAUDI_EVENT_HBM3_SPI_1:
7718 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7719 		gaudi_hbm_read_interrupts(hdev,
7720 				gaudi_hbm_event_to_dev(event_type),
7721 				&eq_entry->hbm_ecc_data);
7722 		hl_fw_unmask_irq(hdev, event_type);
7723 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7724 		break;
7725 
7726 	case GAUDI_EVENT_TPC0_DEC:
7727 	case GAUDI_EVENT_TPC1_DEC:
7728 	case GAUDI_EVENT_TPC2_DEC:
7729 	case GAUDI_EVENT_TPC3_DEC:
7730 	case GAUDI_EVENT_TPC4_DEC:
7731 	case GAUDI_EVENT_TPC5_DEC:
7732 	case GAUDI_EVENT_TPC6_DEC:
7733 	case GAUDI_EVENT_TPC7_DEC:
7734 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7735 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7736 		 * The SW upper layer will inspect an internal mapped area to indicate
7737 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7738 		 */
7739 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7740 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7741 		reset_required = gaudi_tpc_read_interrupts(hdev,
7742 					tpc_dec_event_to_tpc_id(event_type),
7743 					"AXI_SLV_DEC_Error");
7744 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7745 		if (reset_required) {
7746 			dev_err(hdev->dev, "reset required due to %s\n",
7747 				gaudi_irq_map_table[event_type].name);
7748 
7749 			reset_direct = true;
7750 			goto reset_device;
7751 		} else {
7752 			hl_fw_unmask_irq(hdev, event_type);
7753 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7754 		}
7755 		break;
7756 
7757 	case GAUDI_EVENT_TPC0_KRN_ERR:
7758 	case GAUDI_EVENT_TPC1_KRN_ERR:
7759 	case GAUDI_EVENT_TPC2_KRN_ERR:
7760 	case GAUDI_EVENT_TPC3_KRN_ERR:
7761 	case GAUDI_EVENT_TPC4_KRN_ERR:
7762 	case GAUDI_EVENT_TPC5_KRN_ERR:
7763 	case GAUDI_EVENT_TPC6_KRN_ERR:
7764 	case GAUDI_EVENT_TPC7_KRN_ERR:
7765 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7766 		reset_required = gaudi_tpc_read_interrupts(hdev,
7767 					tpc_krn_event_to_tpc_id(event_type),
7768 					"KRN_ERR");
7769 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7770 		if (reset_required) {
7771 			dev_err(hdev->dev, "reset required due to %s\n",
7772 				gaudi_irq_map_table[event_type].name);
7773 
7774 			reset_direct = true;
7775 			goto reset_device;
7776 		} else {
7777 			hl_fw_unmask_irq(hdev, event_type);
7778 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7779 		}
7780 		break;
7781 
7782 	case GAUDI_EVENT_PCIE_CORE_SERR:
7783 	case GAUDI_EVENT_PCIE_IF_SERR:
7784 	case GAUDI_EVENT_PCIE_PHY_SERR:
7785 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7786 	case GAUDI_EVENT_MME0_ACC_SERR:
7787 	case GAUDI_EVENT_MME0_SBAB_SERR:
7788 	case GAUDI_EVENT_MME1_ACC_SERR:
7789 	case GAUDI_EVENT_MME1_SBAB_SERR:
7790 	case GAUDI_EVENT_MME2_ACC_SERR:
7791 	case GAUDI_EVENT_MME2_SBAB_SERR:
7792 	case GAUDI_EVENT_MME3_ACC_SERR:
7793 	case GAUDI_EVENT_MME3_SBAB_SERR:
7794 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7795 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7796 	case GAUDI_EVENT_PSOC_MEM_SERR:
7797 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7798 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7799 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7800 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7801 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7802 		fallthrough;
7803 	case GAUDI_EVENT_MMU_SERR:
7804 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7805 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7806 		hl_fw_unmask_irq(hdev, event_type);
7807 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7808 		break;
7809 
7810 	case GAUDI_EVENT_PCIE_DEC:
7811 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7812 	case GAUDI_EVENT_PSOC_AXI_DEC:
7813 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7814 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7815 		hl_fw_unmask_irq(hdev, event_type);
7816 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7817 		break;
7818 
7819 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7820 	case GAUDI_EVENT_MMU_WR_PERM:
7821 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7822 		hl_fw_unmask_irq(hdev, event_type);
7823 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7824 		break;
7825 
7826 	case GAUDI_EVENT_MME0_WBC_RSP:
7827 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7828 	case GAUDI_EVENT_MME1_WBC_RSP:
7829 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7830 	case GAUDI_EVENT_MME2_WBC_RSP:
7831 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7832 	case GAUDI_EVENT_MME3_WBC_RSP:
7833 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7834 	case GAUDI_EVENT_RAZWI_OR_ADC:
7835 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7836 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7837 		fallthrough;
7838 	case GAUDI_EVENT_NIC0_QM0:
7839 	case GAUDI_EVENT_NIC0_QM1:
7840 	case GAUDI_EVENT_NIC1_QM0:
7841 	case GAUDI_EVENT_NIC1_QM1:
7842 	case GAUDI_EVENT_NIC2_QM0:
7843 	case GAUDI_EVENT_NIC2_QM1:
7844 	case GAUDI_EVENT_NIC3_QM0:
7845 	case GAUDI_EVENT_NIC3_QM1:
7846 	case GAUDI_EVENT_NIC4_QM0:
7847 	case GAUDI_EVENT_NIC4_QM1:
7848 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7849 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7850 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7851 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7852 		hl_fw_unmask_irq(hdev, event_type);
7853 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7854 		break;
7855 
7856 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7857 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7858 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7859 		goto reset_device;
7860 
7861 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7862 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7863 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7864 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7865 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7866 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7867 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7868 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7869 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7870 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7871 		hl_fw_unmask_irq(hdev, event_type);
7872 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7873 		break;
7874 
7875 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7876 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7877 		hl_fw_unmask_irq(hdev, event_type);
7878 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7879 		break;
7880 
7881 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7882 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7883 		gaudi_print_sm_sei_info(hdev, event_type,
7884 					&eq_entry->sm_sei_data);
7885 		rc = hl_state_dump(hdev);
7886 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7887 		if (rc)
7888 			dev_err(hdev->dev,
7889 				"Error during system state dump %d\n", rc);
7890 		hl_fw_unmask_irq(hdev, event_type);
7891 		break;
7892 
7893 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7894 		break;
7895 
7896 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7897 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7898 		hl_fw_unmask_irq(hdev, event_type);
7899 		break;
7900 
7901 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7902 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7903 		dev_err(hdev->dev,
7904 			"Received high temp H/W interrupt %d (cause %d)\n",
7905 			event_type, cause);
7906 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7907 		break;
7908 
7909 	case GAUDI_EVENT_DEV_RESET_REQ:
7910 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7911 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7912 		goto reset_device;
7913 
7914 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7915 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7916 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7917 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7918 		goto reset_device;
7919 
7920 	case GAUDI_EVENT_FW_ALIVE_S:
7921 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7922 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7923 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7924 		fw_err_info.event_id = event_type;
7925 		fw_err_info.event_mask = &event_mask;
7926 		hl_handle_fw_err(hdev, &fw_err_info);
7927 		goto reset_device;
7928 
7929 	default:
7930 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7931 				event_type);
7932 		break;
7933 	}
7934 
7935 	if (event_mask)
7936 		hl_notifier_event_send_all(hdev, event_mask);
7937 
7938 	return;
7939 
7940 reset_device:
7941 	reset_required = true;
7942 
7943 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7944 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7945 
7946 		/* notify on device unavailable while the reset triggered by fw */
7947 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7948 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7949 	} else if (hdev->hard_reset_on_fw_events) {
7950 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7951 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7952 	} else {
7953 		reset_required = false;
7954 	}
7955 
7956 	if (reset_required) {
7957 		/* escalate general hw errors to critical/fatal error */
7958 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7959 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7960 
7961 		hl_device_cond_reset(hdev, flags, event_mask);
7962 	} else {
7963 		hl_fw_unmask_irq(hdev, event_type);
7964 		/* Notification on occurred event needs to be sent although reset is not executed */
7965 		if (event_mask)
7966 			hl_notifier_event_send_all(hdev, event_mask);
7967 	}
7968 }
7969 
7970 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7971 {
7972 	struct gaudi_device *gaudi = hdev->asic_specific;
7973 
7974 	if (aggregate) {
7975 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7976 		return gaudi->events_stat_aggregate;
7977 	}
7978 
7979 	*size = (u32) sizeof(gaudi->events_stat);
7980 	return gaudi->events_stat;
7981 }
7982 
7983 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7984 {
7985 	struct gaudi_device *gaudi = hdev->asic_specific;
7986 	u32 status, timeout_usec;
7987 	int rc;
7988 
7989 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7990 		hdev->reset_info.hard_reset_pending)
7991 		return 0;
7992 
7993 	if (hdev->pldm)
7994 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7995 	else
7996 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7997 
7998 	/* L0 & L1 invalidation */
7999 	WREG32(mmSTLB_INV_PS, 3);
8000 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8001 	WREG32(mmSTLB_INV_PS, 2);
8002 
8003 	rc = hl_poll_timeout(
8004 		hdev,
8005 		mmSTLB_INV_PS,
8006 		status,
8007 		!status,
8008 		1000,
8009 		timeout_usec);
8010 
8011 	WREG32(mmSTLB_INV_SET, 0);
8012 
8013 	return rc;
8014 }
8015 
8016 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8017 						bool is_hard, u32 flags,
8018 						u32 asid, u64 va, u64 size)
8019 {
8020 	/* Treat as invalidate all because there is no range invalidation
8021 	 * in Gaudi
8022 	 */
8023 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8024 }
8025 
8026 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8027 {
8028 	u32 status, timeout_usec;
8029 	int rc;
8030 
8031 	if (hdev->pldm)
8032 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8033 	else
8034 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8035 
8036 	WREG32(MMU_ASID, asid);
8037 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8038 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8039 	WREG32(MMU_BUSY, 0x80000000);
8040 
8041 	rc = hl_poll_timeout(
8042 		hdev,
8043 		MMU_BUSY,
8044 		status,
8045 		!(status & 0x80000000),
8046 		1000,
8047 		timeout_usec);
8048 
8049 	if (rc) {
8050 		dev_err(hdev->dev,
8051 			"Timeout during MMU hop0 config of asid %d\n", asid);
8052 		return rc;
8053 	}
8054 
8055 	return 0;
8056 }
8057 
8058 static int gaudi_send_heartbeat(struct hl_device *hdev)
8059 {
8060 	struct gaudi_device *gaudi = hdev->asic_specific;
8061 
8062 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8063 		return 0;
8064 
8065 	return hl_fw_send_heartbeat(hdev);
8066 }
8067 
8068 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8069 {
8070 	struct gaudi_device *gaudi = hdev->asic_specific;
8071 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8072 	int rc;
8073 
8074 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8075 		return 0;
8076 
8077 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8078 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8079 					mmCPU_BOOT_ERR1);
8080 	if (rc)
8081 		return rc;
8082 
8083 	if (!strlen(prop->cpucp_info.card_name))
8084 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8085 				CARD_NAME_MAX_LEN);
8086 
8087 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8088 
8089 	set_default_power_values(hdev);
8090 
8091 	return 0;
8092 }
8093 
8094 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8095 		struct engines_data *e)
8096 {
8097 	struct gaudi_device *gaudi = hdev->asic_specific;
8098 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8099 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8100 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8101 	unsigned long *mask = (unsigned long *)mask_arr;
8102 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8103 	bool is_idle = true, is_eng_idle, is_slave;
8104 	u64 offset;
8105 	int i, dma_id, port;
8106 
8107 	if (e)
8108 		hl_engine_data_sprintf(e,
8109 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8110 			"---  -------  ------------  ----------  -------------\n");
8111 
8112 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8113 		dma_id = gaudi_dma_assignment[i];
8114 		offset = dma_id * DMA_QMAN_OFFSET;
8115 
8116 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8117 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8118 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8119 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8120 				IS_DMA_IDLE(dma_core_sts0);
8121 		is_idle &= is_eng_idle;
8122 
8123 		if (mask && !is_eng_idle)
8124 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8125 		if (e)
8126 			hl_engine_data_sprintf(e, fmt, dma_id,
8127 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8128 				qm_cgm_sts, dma_core_sts0);
8129 	}
8130 
8131 	if (e)
8132 		hl_engine_data_sprintf(e,
8133 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8134 			"---  -------  ------------  ----------  ----------\n");
8135 
8136 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8137 		offset = i * TPC_QMAN_OFFSET;
8138 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8139 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8140 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8141 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8142 				IS_TPC_IDLE(tpc_cfg_sts);
8143 		is_idle &= is_eng_idle;
8144 
8145 		if (mask && !is_eng_idle)
8146 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8147 		if (e)
8148 			hl_engine_data_sprintf(e, fmt, i,
8149 				is_eng_idle ? "Y" : "N",
8150 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8151 	}
8152 
8153 	if (e)
8154 		hl_engine_data_sprintf(e,
8155 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8156 			"---  -------  ------------  ----------  -----------\n");
8157 
8158 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8159 		offset = i * MME_QMAN_OFFSET;
8160 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8161 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8162 
8163 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8164 		is_slave = i % 2;
8165 		if (!is_slave) {
8166 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8167 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8168 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8169 		}
8170 
8171 		is_idle &= is_eng_idle;
8172 
8173 		if (mask && !is_eng_idle)
8174 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8175 		if (e) {
8176 			if (!is_slave)
8177 				hl_engine_data_sprintf(e, fmt, i,
8178 					is_eng_idle ? "Y" : "N",
8179 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8180 			else
8181 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8182 					is_eng_idle ? "Y" : "N", "-",
8183 					"-", mme_arch_sts);
8184 		}
8185 	}
8186 
8187 	if (e)
8188 		hl_engine_data_sprintf(e,
8189 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8190 				"---  -------  ------------  ----------\n");
8191 
8192 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8193 		offset = i * NIC_MACRO_QMAN_OFFSET;
8194 		port = 2 * i;
8195 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8196 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8197 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8198 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8199 			is_idle &= is_eng_idle;
8200 
8201 			if (mask && !is_eng_idle)
8202 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8203 			if (e)
8204 				hl_engine_data_sprintf(e, nic_fmt, port,
8205 						is_eng_idle ? "Y" : "N",
8206 						qm_glbl_sts0, qm_cgm_sts);
8207 		}
8208 
8209 		port = 2 * i + 1;
8210 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8211 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8212 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8213 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8214 			is_idle &= is_eng_idle;
8215 
8216 			if (mask && !is_eng_idle)
8217 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8218 			if (e)
8219 				hl_engine_data_sprintf(e, nic_fmt, port,
8220 						is_eng_idle ? "Y" : "N",
8221 						qm_glbl_sts0, qm_cgm_sts);
8222 		}
8223 	}
8224 
8225 	if (e)
8226 		hl_engine_data_sprintf(e, "\n");
8227 
8228 	return is_idle;
8229 }
8230 
8231 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8232 	__acquires(&gaudi->hw_queues_lock)
8233 {
8234 	struct gaudi_device *gaudi = hdev->asic_specific;
8235 
8236 	spin_lock(&gaudi->hw_queues_lock);
8237 }
8238 
8239 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8240 	__releases(&gaudi->hw_queues_lock)
8241 {
8242 	struct gaudi_device *gaudi = hdev->asic_specific;
8243 
8244 	spin_unlock(&gaudi->hw_queues_lock);
8245 }
8246 
8247 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8248 {
8249 	return hdev->pdev->device;
8250 }
8251 
8252 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8253 				size_t max_size)
8254 {
8255 	struct gaudi_device *gaudi = hdev->asic_specific;
8256 
8257 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8258 		return 0;
8259 
8260 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8261 }
8262 
8263 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8264 {
8265 	struct gaudi_device *gaudi = hdev->asic_specific;
8266 
8267 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8268 		return 0;
8269 
8270 	return hl_fw_get_monitor_dump(hdev, data);
8271 }
8272 
8273 /*
8274  * this function should be used only during initialization and/or after reset,
8275  * when there are no active users.
8276  */
8277 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8278 {
8279 	u64 kernel_timeout;
8280 	u32 status, offset;
8281 	int rc;
8282 
8283 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8284 
8285 	if (hdev->pldm)
8286 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8287 	else
8288 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8289 
8290 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8291 			lower_32_bits(tpc_kernel));
8292 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8293 			upper_32_bits(tpc_kernel));
8294 
8295 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8296 			lower_32_bits(tpc_kernel));
8297 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8298 			upper_32_bits(tpc_kernel));
8299 	/* set a valid LUT pointer, content is of no significance */
8300 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8301 			lower_32_bits(tpc_kernel));
8302 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8303 			upper_32_bits(tpc_kernel));
8304 
8305 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8306 			lower_32_bits(CFG_BASE +
8307 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8308 
8309 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8310 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8311 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8312 	/* wait a bit for the engine to start executing */
8313 	usleep_range(1000, 1500);
8314 
8315 	/* wait until engine has finished executing */
8316 	rc = hl_poll_timeout(
8317 		hdev,
8318 		mmTPC0_CFG_STATUS + offset,
8319 		status,
8320 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8321 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8322 		1000,
8323 		kernel_timeout);
8324 
8325 	if (rc) {
8326 		dev_err(hdev->dev,
8327 			"Timeout while waiting for TPC%d icache prefetch\n",
8328 			tpc_id);
8329 		return -EIO;
8330 	}
8331 
8332 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8333 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8334 
8335 	/* wait a bit for the engine to start executing */
8336 	usleep_range(1000, 1500);
8337 
8338 	/* wait until engine has finished executing */
8339 	rc = hl_poll_timeout(
8340 		hdev,
8341 		mmTPC0_CFG_STATUS + offset,
8342 		status,
8343 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8344 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8345 		1000,
8346 		kernel_timeout);
8347 
8348 	if (rc) {
8349 		dev_err(hdev->dev,
8350 			"Timeout while waiting for TPC%d vector pipe\n",
8351 			tpc_id);
8352 		return -EIO;
8353 	}
8354 
8355 	rc = hl_poll_timeout(
8356 		hdev,
8357 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8358 		status,
8359 		(status == 0),
8360 		1000,
8361 		kernel_timeout);
8362 
8363 	if (rc) {
8364 		dev_err(hdev->dev,
8365 			"Timeout while waiting for TPC%d kernel to execute\n",
8366 			tpc_id);
8367 		return -EIO;
8368 	}
8369 
8370 	return 0;
8371 }
8372 
8373 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8374 		struct hl_ctx *ctx)
8375 {
8376 	struct gaudi_device *gaudi = hdev->asic_specific;
8377 	int min_alloc_order, rc, collective_cb_size;
8378 
8379 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8380 		return 0;
8381 
8382 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8383 							HOST_SPACE_INTERNAL_CB_SZ,
8384 							&hdev->internal_cb_pool_dma_addr,
8385 							GFP_KERNEL | __GFP_ZERO);
8386 
8387 	if (!hdev->internal_cb_pool_virt_addr)
8388 		return -ENOMEM;
8389 
8390 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8391 			sizeof(struct packet_fence);
8392 	min_alloc_order = ilog2(collective_cb_size);
8393 
8394 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8395 	if (!hdev->internal_cb_pool) {
8396 		dev_err(hdev->dev,
8397 			"Failed to create internal CB pool\n");
8398 		rc = -ENOMEM;
8399 		goto free_internal_cb_pool;
8400 	}
8401 
8402 	rc = gen_pool_add(hdev->internal_cb_pool,
8403 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8404 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8405 	if (rc) {
8406 		dev_err(hdev->dev,
8407 			"Failed to add memory to internal CB pool\n");
8408 		rc = -EFAULT;
8409 		goto destroy_internal_cb_pool;
8410 	}
8411 
8412 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8413 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8414 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8415 
8416 	if (!hdev->internal_cb_va_base) {
8417 		rc = -ENOMEM;
8418 		goto destroy_internal_cb_pool;
8419 	}
8420 
8421 	mutex_lock(&hdev->mmu_lock);
8422 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8423 			hdev->internal_cb_pool_dma_addr,
8424 			HOST_SPACE_INTERNAL_CB_SZ);
8425 
8426 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8427 	mutex_unlock(&hdev->mmu_lock);
8428 
8429 	if (rc)
8430 		goto unreserve_internal_cb_pool;
8431 
8432 	return 0;
8433 
8434 unreserve_internal_cb_pool:
8435 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8436 			HOST_SPACE_INTERNAL_CB_SZ);
8437 destroy_internal_cb_pool:
8438 	gen_pool_destroy(hdev->internal_cb_pool);
8439 free_internal_cb_pool:
8440 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8441 					hdev->internal_cb_pool_dma_addr);
8442 
8443 	return rc;
8444 }
8445 
8446 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8447 		struct hl_ctx *ctx)
8448 {
8449 	struct gaudi_device *gaudi = hdev->asic_specific;
8450 
8451 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8452 		return;
8453 
8454 	mutex_lock(&hdev->mmu_lock);
8455 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8456 			HOST_SPACE_INTERNAL_CB_SZ);
8457 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8458 			HOST_SPACE_INTERNAL_CB_SZ);
8459 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8460 	mutex_unlock(&hdev->mmu_lock);
8461 
8462 	gen_pool_destroy(hdev->internal_cb_pool);
8463 
8464 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8465 					hdev->internal_cb_pool_dma_addr);
8466 }
8467 
8468 static int gaudi_ctx_init(struct hl_ctx *ctx)
8469 {
8470 	int rc;
8471 
8472 	if (ctx->asid == HL_KERNEL_ASID_ID)
8473 		return 0;
8474 
8475 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8476 	if (rc)
8477 		return rc;
8478 
8479 	rc = gaudi_restore_user_registers(ctx->hdev);
8480 	if (rc)
8481 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8482 
8483 	return rc;
8484 }
8485 
8486 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8487 {
8488 	if (ctx->asid == HL_KERNEL_ASID_ID)
8489 		return;
8490 
8491 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8492 }
8493 
8494 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8495 {
8496 	return 0;
8497 }
8498 
8499 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8500 {
8501 	return gaudi_cq_assignment[cq_idx];
8502 }
8503 
8504 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8505 {
8506 	return sizeof(struct packet_msg_short) +
8507 			sizeof(struct packet_msg_prot) * 2;
8508 }
8509 
8510 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8511 {
8512 	return sizeof(struct packet_msg_short) * 4 +
8513 			sizeof(struct packet_fence) +
8514 			sizeof(struct packet_msg_prot) * 2;
8515 }
8516 
8517 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8518 {
8519 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8520 }
8521 
8522 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8523 				u32 size, bool eb)
8524 {
8525 	struct hl_cb *cb = (struct hl_cb *) data;
8526 	struct packet_msg_short *pkt;
8527 	u32 value, ctl, pkt_size = sizeof(*pkt);
8528 
8529 	pkt = cb->kernel_address + size;
8530 	memset(pkt, 0, pkt_size);
8531 
8532 	/* Inc by 1, Mode ADD */
8533 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8534 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8535 
8536 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8537 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8538 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8539 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8540 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8541 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8542 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8543 
8544 	pkt->value = cpu_to_le32(value);
8545 	pkt->ctl = cpu_to_le32(ctl);
8546 
8547 	return size + pkt_size;
8548 }
8549 
8550 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8551 					u16 addr)
8552 {
8553 	u32 ctl, pkt_size = sizeof(*pkt);
8554 
8555 	memset(pkt, 0, pkt_size);
8556 
8557 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8558 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8559 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8560 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8561 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8562 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8563 
8564 	pkt->value = cpu_to_le32(value);
8565 	pkt->ctl = cpu_to_le32(ctl);
8566 
8567 	return pkt_size;
8568 }
8569 
8570 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8571 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8572 		u16 sob_val, u16 mon_id)
8573 {
8574 	u64 monitor_base;
8575 	u32 ctl, value, pkt_size = sizeof(*pkt);
8576 	u16 msg_addr_offset;
8577 	u8 mask;
8578 
8579 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8580 		dev_err(hdev->dev,
8581 			"sob_base %u (mask %#x) is not valid\n",
8582 			sob_base, sob_mask);
8583 		return 0;
8584 	}
8585 
8586 	/*
8587 	 * monitor_base should be the content of the base0 address registers,
8588 	 * so it will be added to the msg short offsets
8589 	 */
8590 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8591 
8592 	msg_addr_offset =
8593 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8594 				monitor_base;
8595 
8596 	memset(pkt, 0, pkt_size);
8597 
8598 	/* Monitor config packet: bind the monitor to a sync object */
8599 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8600 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8601 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8602 			0); /* GREATER OR EQUAL*/
8603 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8604 
8605 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8606 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8607 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8608 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8609 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8610 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8611 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8612 
8613 	pkt->value = cpu_to_le32(value);
8614 	pkt->ctl = cpu_to_le32(ctl);
8615 
8616 	return pkt_size;
8617 }
8618 
8619 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8620 {
8621 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8622 
8623 	memset(pkt, 0, pkt_size);
8624 
8625 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8626 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8627 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8628 
8629 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8630 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8631 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8632 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8633 
8634 	pkt->cfg = cpu_to_le32(cfg);
8635 	pkt->ctl = cpu_to_le32(ctl);
8636 
8637 	return pkt_size;
8638 }
8639 
8640 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8641 {
8642 	u32 offset, nic_index;
8643 
8644 	switch (queue_id) {
8645 	case GAUDI_QUEUE_ID_DMA_0_0:
8646 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8647 		break;
8648 	case GAUDI_QUEUE_ID_DMA_0_1:
8649 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8650 		break;
8651 	case GAUDI_QUEUE_ID_DMA_0_2:
8652 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8653 		break;
8654 	case GAUDI_QUEUE_ID_DMA_0_3:
8655 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8656 		break;
8657 	case GAUDI_QUEUE_ID_DMA_1_0:
8658 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8659 		break;
8660 	case GAUDI_QUEUE_ID_DMA_1_1:
8661 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8662 		break;
8663 	case GAUDI_QUEUE_ID_DMA_1_2:
8664 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8665 		break;
8666 	case GAUDI_QUEUE_ID_DMA_1_3:
8667 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8668 		break;
8669 	case GAUDI_QUEUE_ID_DMA_5_0:
8670 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8671 		break;
8672 	case GAUDI_QUEUE_ID_DMA_5_1:
8673 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8674 		break;
8675 	case GAUDI_QUEUE_ID_DMA_5_2:
8676 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8677 		break;
8678 	case GAUDI_QUEUE_ID_DMA_5_3:
8679 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8680 		break;
8681 	case GAUDI_QUEUE_ID_TPC_7_0:
8682 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8683 		break;
8684 	case GAUDI_QUEUE_ID_TPC_7_1:
8685 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8686 		break;
8687 	case GAUDI_QUEUE_ID_TPC_7_2:
8688 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8689 		break;
8690 	case GAUDI_QUEUE_ID_TPC_7_3:
8691 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8692 		break;
8693 	case GAUDI_QUEUE_ID_NIC_0_0:
8694 	case GAUDI_QUEUE_ID_NIC_1_0:
8695 	case GAUDI_QUEUE_ID_NIC_2_0:
8696 	case GAUDI_QUEUE_ID_NIC_3_0:
8697 	case GAUDI_QUEUE_ID_NIC_4_0:
8698 	case GAUDI_QUEUE_ID_NIC_5_0:
8699 	case GAUDI_QUEUE_ID_NIC_6_0:
8700 	case GAUDI_QUEUE_ID_NIC_7_0:
8701 	case GAUDI_QUEUE_ID_NIC_8_0:
8702 	case GAUDI_QUEUE_ID_NIC_9_0:
8703 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8704 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8705 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8706 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8707 		break;
8708 	case GAUDI_QUEUE_ID_NIC_0_1:
8709 	case GAUDI_QUEUE_ID_NIC_1_1:
8710 	case GAUDI_QUEUE_ID_NIC_2_1:
8711 	case GAUDI_QUEUE_ID_NIC_3_1:
8712 	case GAUDI_QUEUE_ID_NIC_4_1:
8713 	case GAUDI_QUEUE_ID_NIC_5_1:
8714 	case GAUDI_QUEUE_ID_NIC_6_1:
8715 	case GAUDI_QUEUE_ID_NIC_7_1:
8716 	case GAUDI_QUEUE_ID_NIC_8_1:
8717 	case GAUDI_QUEUE_ID_NIC_9_1:
8718 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8719 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8720 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8721 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8722 		break;
8723 	case GAUDI_QUEUE_ID_NIC_0_2:
8724 	case GAUDI_QUEUE_ID_NIC_1_2:
8725 	case GAUDI_QUEUE_ID_NIC_2_2:
8726 	case GAUDI_QUEUE_ID_NIC_3_2:
8727 	case GAUDI_QUEUE_ID_NIC_4_2:
8728 	case GAUDI_QUEUE_ID_NIC_5_2:
8729 	case GAUDI_QUEUE_ID_NIC_6_2:
8730 	case GAUDI_QUEUE_ID_NIC_7_2:
8731 	case GAUDI_QUEUE_ID_NIC_8_2:
8732 	case GAUDI_QUEUE_ID_NIC_9_2:
8733 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8734 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8735 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8736 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8737 		break;
8738 	case GAUDI_QUEUE_ID_NIC_0_3:
8739 	case GAUDI_QUEUE_ID_NIC_1_3:
8740 	case GAUDI_QUEUE_ID_NIC_2_3:
8741 	case GAUDI_QUEUE_ID_NIC_3_3:
8742 	case GAUDI_QUEUE_ID_NIC_4_3:
8743 	case GAUDI_QUEUE_ID_NIC_5_3:
8744 	case GAUDI_QUEUE_ID_NIC_6_3:
8745 	case GAUDI_QUEUE_ID_NIC_7_3:
8746 	case GAUDI_QUEUE_ID_NIC_8_3:
8747 	case GAUDI_QUEUE_ID_NIC_9_3:
8748 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8749 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8750 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8751 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8752 		break;
8753 	default:
8754 		return -EINVAL;
8755 	}
8756 
8757 	*addr = CFG_BASE + offset;
8758 
8759 	return 0;
8760 }
8761 
8762 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8763 {
8764 	u64 monitor_base;
8765 	u32 size = 0;
8766 	u16 msg_addr_offset;
8767 
8768 	/*
8769 	 * monitor_base should be the content of the base0 address registers,
8770 	 * so it will be added to the msg short offsets
8771 	 */
8772 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8773 
8774 	/* First monitor config packet: low address of the sync */
8775 	msg_addr_offset =
8776 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8777 				monitor_base;
8778 
8779 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8780 					msg_addr_offset);
8781 
8782 	/* Second monitor config packet: high address of the sync */
8783 	msg_addr_offset =
8784 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8785 				monitor_base;
8786 
8787 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8788 					msg_addr_offset);
8789 
8790 	/*
8791 	 * Third monitor config packet: the payload, i.e. what to write when the
8792 	 * sync triggers
8793 	 */
8794 	msg_addr_offset =
8795 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8796 				monitor_base;
8797 
8798 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8799 
8800 	return size;
8801 }
8802 
8803 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8804 				struct hl_gen_wait_properties *prop)
8805 {
8806 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8807 	void *buf = cb->kernel_address;
8808 	u64 fence_addr = 0;
8809 	u32 size = prop->size;
8810 
8811 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8812 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8813 				prop->q_idx);
8814 		return 0;
8815 	}
8816 
8817 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8818 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8819 			prop->sob_mask, prop->sob_val, prop->mon_id);
8820 	size += gaudi_add_fence_pkt(buf + size);
8821 
8822 	return size;
8823 }
8824 
8825 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8826 {
8827 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8828 
8829 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8830 		hw_sob->sob_id);
8831 
8832 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8833 			hw_sob->sob_id * 4, 0);
8834 
8835 	kref_init(&hw_sob->kref);
8836 }
8837 
8838 static u64 gaudi_get_device_time(struct hl_device *hdev)
8839 {
8840 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8841 
8842 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8843 }
8844 
8845 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8846 				u32 *block_size, u32 *block_id)
8847 {
8848 	return -EPERM;
8849 }
8850 
8851 static int gaudi_block_mmap(struct hl_device *hdev,
8852 				struct vm_area_struct *vma,
8853 				u32 block_id, u32 block_size)
8854 {
8855 	return -EPERM;
8856 }
8857 
8858 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8859 {
8860 	struct cpu_dyn_regs *dyn_regs =
8861 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8862 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8863 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8864 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8865 
8866 	WREG32(irq_handler_offset,
8867 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8868 }
8869 
8870 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8871 {
8872 	return -EINVAL;
8873 }
8874 
8875 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8876 {
8877 	switch (pll_idx) {
8878 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8879 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8880 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8881 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8882 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8883 	case HL_GAUDI_MME_PLL: return MME_PLL;
8884 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8885 	case HL_GAUDI_IF_PLL: return IF_PLL;
8886 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8887 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8888 	default: return -EINVAL;
8889 	}
8890 }
8891 
8892 static int gaudi_add_sync_to_engine_map_entry(
8893 	struct hl_sync_to_engine_map *map, u32 reg_value,
8894 	enum hl_sync_engine_type engine_type, u32 engine_id)
8895 {
8896 	struct hl_sync_to_engine_map_entry *entry;
8897 
8898 	/* Reg value represents a partial address of sync object,
8899 	 * it is used as unique identifier. For this we need to
8900 	 * clear the cutoff cfg base bits from the value.
8901 	 */
8902 	if (reg_value == 0 || reg_value == 0xffffffff)
8903 		return 0;
8904 	reg_value -= lower_32_bits(CFG_BASE);
8905 
8906 	/* create a new hash entry */
8907 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8908 	if (!entry)
8909 		return -ENOMEM;
8910 	entry->engine_type = engine_type;
8911 	entry->engine_id = engine_id;
8912 	entry->sync_id = reg_value;
8913 	hash_add(map->tb, &entry->node, reg_value);
8914 
8915 	return 0;
8916 }
8917 
8918 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8919 				struct hl_sync_to_engine_map *map)
8920 {
8921 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8922 	int i, j, rc;
8923 	u32 reg_value;
8924 
8925 	/* Iterate over TPC engines */
8926 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8927 
8928 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8929 					sds->props[SP_NEXT_TPC] * i);
8930 
8931 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8932 							ENGINE_TPC, i);
8933 		if (rc)
8934 			goto free_sync_to_engine_map;
8935 	}
8936 
8937 	/* Iterate over MME engines */
8938 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8939 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8940 
8941 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8942 						sds->props[SP_NEXT_MME] * i +
8943 						j * sizeof(u32));
8944 
8945 			rc = gaudi_add_sync_to_engine_map_entry(
8946 				map, reg_value, ENGINE_MME,
8947 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8948 			if (rc)
8949 				goto free_sync_to_engine_map;
8950 		}
8951 	}
8952 
8953 	/* Iterate over DMA engines */
8954 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8955 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8956 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8957 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8958 							ENGINE_DMA, i);
8959 		if (rc)
8960 			goto free_sync_to_engine_map;
8961 	}
8962 
8963 	return 0;
8964 
8965 free_sync_to_engine_map:
8966 	hl_state_dump_free_sync_to_engine_map(map);
8967 
8968 	return rc;
8969 }
8970 
8971 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8972 {
8973 	return FIELD_GET(
8974 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8975 		mon->status);
8976 }
8977 
8978 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8979 {
8980 	const size_t max_write = 10;
8981 	u32 gid, mask, sob;
8982 	int i, offset;
8983 
8984 	/* Sync object ID is calculated as follows:
8985 	 * (8 * group_id + cleared bits in mask)
8986 	 */
8987 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8988 			mon->arm_data);
8989 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8990 			mon->arm_data);
8991 
8992 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8993 		max_write; mask >>= 1, i++) {
8994 		if (!(mask & 1)) {
8995 			sob = gid * MONITOR_MAX_SOBS + i;
8996 
8997 			if (offset > 0)
8998 				offset += snprintf(sobs + offset, max_write,
8999 							", ");
9000 
9001 			offset += snprintf(sobs + offset, max_write, "%u", sob);
9002 		}
9003 	}
9004 }
9005 
9006 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9007 				struct hl_device *hdev,
9008 				struct hl_mon_state_dump *mon)
9009 {
9010 	const char *name;
9011 	char scratch_buf1[BIN_REG_STRING_SIZE],
9012 		scratch_buf2[BIN_REG_STRING_SIZE];
9013 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9014 
9015 	name = hl_state_dump_get_monitor_name(hdev, mon);
9016 	if (!name)
9017 		name = "";
9018 
9019 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9020 
9021 	return hl_snprintf_resize(
9022 		buf, size, offset,
9023 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9024 		mon->id, name,
9025 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9026 				mon->arm_data),
9027 		hl_format_as_binary(
9028 			scratch_buf1, sizeof(scratch_buf1),
9029 			FIELD_GET(
9030 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9031 				mon->arm_data)),
9032 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9033 				mon->arm_data),
9034 		mon->wr_data,
9035 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9036 		hl_format_as_binary(
9037 			scratch_buf2, sizeof(scratch_buf2),
9038 			FIELD_GET(
9039 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9040 				mon->status)),
9041 		monitored_sobs);
9042 }
9043 
9044 
9045 static int gaudi_print_fences_single_engine(
9046 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9047 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9048 	size_t *size, size_t *offset)
9049 {
9050 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9051 	int rc = -ENOMEM, i;
9052 	u32 *statuses, *fences;
9053 
9054 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9055 			sizeof(*statuses), GFP_KERNEL);
9056 	if (!statuses)
9057 		goto out;
9058 
9059 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9060 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9061 			 sizeof(*fences), GFP_KERNEL);
9062 	if (!fences)
9063 		goto free_status;
9064 
9065 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9066 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9067 
9068 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9069 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9070 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9071 
9072 	/* The actual print */
9073 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9074 		u32 fence_id;
9075 		u64 fence_cnt, fence_rdata;
9076 		const char *engine_name;
9077 
9078 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9079 			statuses[i]))
9080 			continue;
9081 
9082 		fence_id =
9083 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9084 		fence_cnt = base_offset + CFG_BASE +
9085 			sizeof(u32) *
9086 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9087 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9088 				sds->props[SP_FENCE0_RDATA_OFFSET];
9089 		engine_name = hl_sync_engine_to_string(engine_type);
9090 
9091 		rc = hl_snprintf_resize(
9092 			buf, size, offset,
9093 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9094 			engine_name, engine_id,
9095 			i, fence_id,
9096 			fence_cnt, engine_name, engine_id, fence_id, i,
9097 			fence_rdata, engine_name, engine_id, fence_id, i,
9098 			fences[fence_id],
9099 			statuses[i]);
9100 		if (rc)
9101 			goto free_fences;
9102 	}
9103 
9104 	rc = 0;
9105 
9106 free_fences:
9107 	kfree(fences);
9108 free_status:
9109 	kfree(statuses);
9110 out:
9111 	return rc;
9112 }
9113 
9114 
9115 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9116 	.monitor_valid = gaudi_monitor_valid,
9117 	.print_single_monitor = gaudi_print_single_monitor,
9118 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9119 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9120 };
9121 
9122 static void gaudi_state_dump_init(struct hl_device *hdev)
9123 {
9124 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9125 	int i;
9126 
9127 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9128 		hash_add(sds->so_id_to_str_tb,
9129 			&gaudi_so_id_to_str[i].node,
9130 			gaudi_so_id_to_str[i].id);
9131 
9132 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9133 		hash_add(sds->monitor_id_to_str_tb,
9134 			&gaudi_monitor_id_to_str[i].node,
9135 			gaudi_monitor_id_to_str[i].id);
9136 
9137 	sds->props = gaudi_state_dump_specs_props;
9138 
9139 	sds->sync_namager_names = gaudi_sync_manager_names;
9140 
9141 	sds->funcs = gaudi_state_dump_funcs;
9142 }
9143 
9144 static u32 *gaudi_get_stream_master_qid_arr(void)
9145 {
9146 	return gaudi_stream_master;
9147 }
9148 
9149 static int gaudi_set_dram_properties(struct hl_device *hdev)
9150 {
9151 	return 0;
9152 }
9153 
9154 static int gaudi_set_binning_masks(struct hl_device *hdev)
9155 {
9156 	return 0;
9157 }
9158 
9159 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9160 {
9161 }
9162 
9163 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9164 {
9165 	struct hl_device *hdev = dev_get_drvdata(dev);
9166 	struct cpucp_info *cpucp_info;
9167 
9168 	cpucp_info = &hdev->asic_prop.cpucp_info;
9169 
9170 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9171 }
9172 
9173 static DEVICE_ATTR_RO(infineon_ver);
9174 
9175 static struct attribute *gaudi_vrm_dev_attrs[] = {
9176 	&dev_attr_infineon_ver.attr,
9177 	NULL,
9178 };
9179 
9180 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9181 					struct attribute_group *dev_vrm_attr_grp)
9182 {
9183 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9184 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9185 }
9186 
9187 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9188 {
9189 	return 0;
9190 }
9191 
9192 static const struct hl_asic_funcs gaudi_funcs = {
9193 	.early_init = gaudi_early_init,
9194 	.early_fini = gaudi_early_fini,
9195 	.late_init = gaudi_late_init,
9196 	.late_fini = gaudi_late_fini,
9197 	.sw_init = gaudi_sw_init,
9198 	.sw_fini = gaudi_sw_fini,
9199 	.hw_init = gaudi_hw_init,
9200 	.hw_fini = gaudi_hw_fini,
9201 	.halt_engines = gaudi_halt_engines,
9202 	.suspend = gaudi_suspend,
9203 	.resume = gaudi_resume,
9204 	.mmap = gaudi_mmap,
9205 	.ring_doorbell = gaudi_ring_doorbell,
9206 	.pqe_write = gaudi_pqe_write,
9207 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9208 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9209 	.scrub_device_mem = gaudi_scrub_device_mem,
9210 	.scrub_device_dram = gaudi_scrub_device_dram,
9211 	.get_int_queue_base = gaudi_get_int_queue_base,
9212 	.test_queues = gaudi_test_queues,
9213 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9214 	.asic_dma_pool_free = gaudi_dma_pool_free,
9215 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9216 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9217 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9218 	.cs_parser = gaudi_cs_parser,
9219 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9220 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9221 	.update_eq_ci = gaudi_update_eq_ci,
9222 	.context_switch = gaudi_context_switch,
9223 	.restore_phase_topology = gaudi_restore_phase_topology,
9224 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9225 	.add_device_attr = gaudi_add_device_attr,
9226 	.handle_eqe = gaudi_handle_eqe,
9227 	.get_events_stat = gaudi_get_events_stat,
9228 	.read_pte = gaudi_read_pte,
9229 	.write_pte = gaudi_write_pte,
9230 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9231 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9232 	.mmu_prefetch_cache_range = NULL,
9233 	.send_heartbeat = gaudi_send_heartbeat,
9234 	.debug_coresight = gaudi_debug_coresight,
9235 	.is_device_idle = gaudi_is_device_idle,
9236 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9237 	.hw_queues_lock = gaudi_hw_queues_lock,
9238 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9239 	.get_pci_id = gaudi_get_pci_id,
9240 	.get_eeprom_data = gaudi_get_eeprom_data,
9241 	.get_monitor_dump = gaudi_get_monitor_dump,
9242 	.send_cpu_message = gaudi_send_cpu_message,
9243 	.pci_bars_map = gaudi_pci_bars_map,
9244 	.init_iatu = gaudi_init_iatu,
9245 	.rreg = hl_rreg,
9246 	.wreg = hl_wreg,
9247 	.halt_coresight = gaudi_halt_coresight,
9248 	.ctx_init = gaudi_ctx_init,
9249 	.ctx_fini = gaudi_ctx_fini,
9250 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9251 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9252 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9253 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9254 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9255 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9256 	.gen_signal_cb = gaudi_gen_signal_cb,
9257 	.gen_wait_cb = gaudi_gen_wait_cb,
9258 	.reset_sob = gaudi_reset_sob,
9259 	.reset_sob_group = gaudi_reset_sob_group,
9260 	.get_device_time = gaudi_get_device_time,
9261 	.pb_print_security_errors = NULL,
9262 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9263 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9264 	.get_dec_base_addr = NULL,
9265 	.scramble_addr = hl_mmu_scramble_addr,
9266 	.descramble_addr = hl_mmu_descramble_addr,
9267 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9268 	.get_hw_block_id = gaudi_get_hw_block_id,
9269 	.hw_block_mmap = gaudi_block_mmap,
9270 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9271 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9272 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9273 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9274 	.init_firmware_loader = gaudi_init_firmware_loader,
9275 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9276 	.state_dump_init = gaudi_state_dump_init,
9277 	.get_sob_addr = gaudi_get_sob_addr,
9278 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9279 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9280 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9281 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9282 	.access_dev_mem = hl_access_dev_mem,
9283 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9284 	.send_device_activity = gaudi_send_device_activity,
9285 	.set_dram_properties = gaudi_set_dram_properties,
9286 	.set_binning_masks = gaudi_set_binning_masks,
9287 };
9288 
9289 /**
9290  * gaudi_set_asic_funcs - set GAUDI function pointers
9291  *
9292  * @hdev: pointer to hl_device structure
9293  *
9294  */
9295 void gaudi_set_asic_funcs(struct hl_device *hdev)
9296 {
9297 	hdev->asic_funcs = &gaudi_funcs;
9298 }
9299