xref: /openbmc/linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision f4356947f0297b0962fdd197672db7edf9f58be6)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 		"gaudi cpu eq"
122 };
123 
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134 
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
137 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
138 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
139 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
140 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
141 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
142 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
143 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145 
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
148 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
149 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
150 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
151 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
152 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
153 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
154 	[PACKET_FENCE]		= sizeof(struct packet_fence),
155 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
156 	[PACKET_NOP]		= sizeof(struct packet_nop),
157 	[PACKET_STOP]		= sizeof(struct packet_stop),
158 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
159 	[PACKET_WAIT]		= sizeof(struct packet_wait),
160 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
161 };
162 
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 	switch (id) {
166 	case PACKET_WREG_32:
167 	case PACKET_WREG_BULK:
168 	case PACKET_MSG_LONG:
169 	case PACKET_MSG_SHORT:
170 	case PACKET_CP_DMA:
171 	case PACKET_REPEAT:
172 	case PACKET_MSG_PROT:
173 	case PACKET_FENCE:
174 	case PACKET_LIN_DMA:
175 	case PACKET_NOP:
176 	case PACKET_STOP:
177 	case PACKET_ARB_POINT:
178 	case PACKET_WAIT:
179 	case PACKET_LOAD_AND_EXE:
180 		return true;
181 	default:
182 		return false;
183 	}
184 }
185 
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 	"tpc_address_exceed_slm",
189 	"tpc_div_by_0",
190 	"tpc_spu_mac_overflow",
191 	"tpc_spu_addsub_overflow",
192 	"tpc_spu_abs_overflow",
193 	"tpc_spu_fp_dst_nan_inf",
194 	"tpc_spu_fp_dst_denorm",
195 	"tpc_vpu_mac_overflow",
196 	"tpc_vpu_addsub_overflow",
197 	"tpc_vpu_abs_overflow",
198 	"tpc_vpu_fp_dst_nan_inf",
199 	"tpc_vpu_fp_dst_denorm",
200 	"tpc_assertions",
201 	"tpc_illegal_instruction",
202 	"tpc_pc_wrap_around",
203 	"tpc_qm_sw_err",
204 	"tpc_hbw_rresp_err",
205 	"tpc_hbw_bresp_err",
206 	"tpc_lbw_rresp_err",
207 	"tpc_lbw_bresp_err"
208 };
209 
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 	"PQ AXI HBW error",
213 	"CQ AXI HBW error",
214 	"CP AXI HBW error",
215 	"CP error due to undefined OPCODE",
216 	"CP encountered STOP OPCODE",
217 	"CP AXI LBW error",
218 	"CP WRREG32 or WRBULK returned error",
219 	"N/A",
220 	"FENCE 0 inc over max value and clipped",
221 	"FENCE 1 inc over max value and clipped",
222 	"FENCE 2 inc over max value and clipped",
223 	"FENCE 3 inc over max value and clipped",
224 	"FENCE 0 dec under min value and clipped",
225 	"FENCE 1 dec under min value and clipped",
226 	"FENCE 2 dec under min value and clipped",
227 	"FENCE 3 dec under min value and clipped"
228 };
229 
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 	"Choice push while full error",
233 	"Choice Q watchdog error",
234 	"MSG AXI LBW returned with error"
235 };
236 
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352 
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382 
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396 
397 static s64 gaudi_state_dump_specs_props[] = {
398 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 	[SP_MON_OBJ_WR_ADDR_LOW] =
402 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 	[SP_MON_OBJ_WR_ADDR_HIGH] =
404 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 	[SP_FENCE0_CNT_OFFSET] =
426 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_FENCE0_RDATA_OFFSET] =
428 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 	[SP_NUM_CORES] = 1,
431 };
432 
433 static const int gaudi_queue_id_to_engine_id[] = {
434 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464 
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469 	"SYNC_MGR_E_N",
470 	"SYNC_MGR_W_N",
471 	"SYNC_MGR_E_S",
472 	"SYNC_MGR_W_S",
473 	NULL
474 };
475 
476 struct ecc_info_extract_params {
477 	u64 block_address;
478 	u32 num_memories;
479 	bool derr;
480 };
481 
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 								u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 					struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 					u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 					u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 				u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 				u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 				struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 		return HL_COLLECTIVE_MASTER;
505 
506 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 		return HL_COLLECTIVE_SLAVE;
509 
510 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 		return HL_COLLECTIVE_SLAVE;
513 
514 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 		return HL_COLLECTIVE_SLAVE;
517 
518 	return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520 
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 	struct asic_fixed_properties *prop = &hdev->asic_prop;
524 
525 	if (hdev->card_type == cpucp_card_type_pmc) {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527 
528 		if (prop->fw_security_enabled)
529 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 		else
531 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 	} else {
533 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 	}
536 }
537 
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 	struct asic_fixed_properties *prop = &hdev->asic_prop;
541 	u32 num_sync_stream_queues = 0;
542 	int i;
543 
544 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 	prop->hw_queues_props = kcalloc(prop->max_queues,
546 			sizeof(struct hw_queue_properties),
547 			GFP_KERNEL);
548 
549 	if (!prop->hw_queues_props)
550 		return -ENOMEM;
551 
552 	for (i = 0 ; i < prop->max_queues ; i++) {
553 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 			prop->hw_queues_props[i].driver_only = 0;
556 			prop->hw_queues_props[i].supports_sync_stream = 1;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 			num_sync_stream_queues++;
560 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 			prop->hw_queues_props[i].driver_only = 1;
563 			prop->hw_queues_props[i].supports_sync_stream = 0;
564 			prop->hw_queues_props[i].cb_alloc_flags =
565 				CB_ALLOC_KERNEL;
566 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 			prop->hw_queues_props[i].driver_only = 0;
569 			prop->hw_queues_props[i].supports_sync_stream = 0;
570 			prop->hw_queues_props[i].cb_alloc_flags =
571 				CB_ALLOC_USER;
572 
573 		}
574 		prop->hw_queues_props[i].collective_mode =
575 						get_collective_mode(hdev, i);
576 	}
577 
578 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 	prop->cfg_base_address = CFG_BASE;
580 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 	prop->host_base_address = HOST_PHYS_BASE;
582 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 	prop->collective_first_sob = 0;
586 	prop->collective_first_mon = 0;
587 
588 	/* 2 SOBs per internal queue stream are reserved for collective */
589 	prop->sync_stream_first_sob =
590 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 			* QMAN_STREAMS * HL_RSVD_SOBS;
592 
593 	/* 1 monitor per internal queue stream are reserved for collective
594 	 * 2 monitors per external queue stream are reserved for collective
595 	 */
596 	prop->sync_stream_first_mon =
597 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 			(NUMBER_OF_EXT_HW_QUEUES * 2);
599 
600 	prop->dram_base_address = DRAM_PHYS_BASE;
601 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604 
605 	prop->sram_base_address = SRAM_BASE_ADDR;
606 	prop->sram_size = SRAM_SIZE;
607 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 	prop->sram_user_base_address =
609 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610 
611 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613 
614 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 	if (hdev->pldm)
616 		prop->mmu_pgt_size = 0x800000; /* 8MB */
617 	else
618 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 	prop->mmu_pte_size = HL_PTE_SIZE;
620 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 	prop->dram_page_size = PAGE_SIZE_2MB;
623 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 	prop->dram_supports_virtual_memory = false;
625 
626 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 	prop->pmmu.end_addr =
638 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 	prop->pmmu.page_size = PAGE_SIZE_4KB;
640 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 	prop->pmmu.last_mask = LAST_MASK;
642 	/* TODO: will be duplicated until implementing per-MMU props */
643 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645 
646 	/* PMMU and HPMMU are the same except of page size */
647 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649 
650 	/* shifts and masks are the same in PMMU and DMMU */
651 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 	prop->dmmu.page_size = PAGE_SIZE_2MB;
655 
656 	prop->cfg_size = CFG_SIZE;
657 	prop->max_asid = MAX_ASID;
658 	prop->num_of_events = GAUDI_EVENT_SIZE;
659 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
660 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
661 
662 	set_default_power_values(hdev);
663 
664 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
665 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
666 
667 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
668 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
669 
670 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
671 					CARD_NAME_MAX_LEN);
672 
673 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
674 
675 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
676 			prop->sync_stream_first_sob +
677 			(num_sync_stream_queues * HL_RSVD_SOBS);
678 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
679 			prop->sync_stream_first_mon +
680 			(num_sync_stream_queues * HL_RSVD_MONS);
681 
682 	prop->first_available_user_interrupt = USHRT_MAX;
683 	prop->tpc_interrupt_id = USHRT_MAX;
684 
685 	/* single msi */
686 	prop->eq_interrupt_id = 0;
687 
688 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
689 		prop->first_available_cq[i] = USHRT_MAX;
690 
691 	prop->fw_cpu_boot_dev_sts0_valid = false;
692 	prop->fw_cpu_boot_dev_sts1_valid = false;
693 	prop->hard_reset_done_by_fw = false;
694 	prop->gic_interrupts_enable = true;
695 
696 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
697 
698 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
699 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
700 
701 	prop->use_get_power_for_reset_history = true;
702 
703 	prop->configurable_stop_on_err = true;
704 
705 	prop->set_max_power_on_device_init = true;
706 
707 	prop->dma_mask = 48;
708 
709 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
710 
711 	return 0;
712 }
713 
714 static int gaudi_pci_bars_map(struct hl_device *hdev)
715 {
716 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
717 	bool is_wc[3] = {false, false, true};
718 	int rc;
719 
720 	rc = hl_pci_bars_map(hdev, name, is_wc);
721 	if (rc)
722 		return rc;
723 
724 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
725 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
726 
727 	return 0;
728 }
729 
730 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
731 {
732 	struct gaudi_device *gaudi = hdev->asic_specific;
733 	struct hl_inbound_pci_region pci_region;
734 	u64 old_addr = addr;
735 	int rc;
736 
737 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
738 		return old_addr;
739 
740 	if (hdev->asic_prop.iatu_done_by_fw)
741 		return U64_MAX;
742 
743 	/* Inbound Region 2 - Bar 4 - Point to HBM */
744 	pci_region.mode = PCI_BAR_MATCH_MODE;
745 	pci_region.bar = HBM_BAR_ID;
746 	pci_region.addr = addr;
747 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
748 	if (rc)
749 		return U64_MAX;
750 
751 	if (gaudi) {
752 		old_addr = gaudi->hbm_bar_cur_addr;
753 		gaudi->hbm_bar_cur_addr = addr;
754 	}
755 
756 	return old_addr;
757 }
758 
759 static int gaudi_init_iatu(struct hl_device *hdev)
760 {
761 	struct hl_inbound_pci_region inbound_region;
762 	struct hl_outbound_pci_region outbound_region;
763 	int rc;
764 
765 	if (hdev->asic_prop.iatu_done_by_fw)
766 		return 0;
767 
768 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
769 	inbound_region.mode = PCI_BAR_MATCH_MODE;
770 	inbound_region.bar = SRAM_BAR_ID;
771 	inbound_region.addr = SRAM_BASE_ADDR;
772 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
773 	if (rc)
774 		goto done;
775 
776 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
777 	inbound_region.mode = PCI_BAR_MATCH_MODE;
778 	inbound_region.bar = CFG_BAR_ID;
779 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
780 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
781 	if (rc)
782 		goto done;
783 
784 	/* Inbound Region 2 - Bar 4 - Point to HBM */
785 	inbound_region.mode = PCI_BAR_MATCH_MODE;
786 	inbound_region.bar = HBM_BAR_ID;
787 	inbound_region.addr = DRAM_PHYS_BASE;
788 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
789 	if (rc)
790 		goto done;
791 
792 	/* Outbound Region 0 - Point to Host */
793 	outbound_region.addr = HOST_PHYS_BASE;
794 	outbound_region.size = HOST_PHYS_SIZE;
795 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
796 
797 done:
798 	return rc;
799 }
800 
801 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
802 {
803 	return RREG32(mmHW_STATE);
804 }
805 
806 static int gaudi_early_init(struct hl_device *hdev)
807 {
808 	struct asic_fixed_properties *prop = &hdev->asic_prop;
809 	struct pci_dev *pdev = hdev->pdev;
810 	resource_size_t pci_bar_size;
811 	u32 fw_boot_status;
812 	int rc;
813 
814 	rc = gaudi_set_fixed_properties(hdev);
815 	if (rc) {
816 		dev_err(hdev->dev, "Failed setting fixed properties\n");
817 		return rc;
818 	}
819 
820 	/* Check BAR sizes */
821 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
822 
823 	if (pci_bar_size != SRAM_BAR_SIZE) {
824 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
825 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
826 		rc = -ENODEV;
827 		goto free_queue_props;
828 	}
829 
830 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
831 
832 	if (pci_bar_size != CFG_BAR_SIZE) {
833 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
834 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
835 		rc = -ENODEV;
836 		goto free_queue_props;
837 	}
838 
839 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
840 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
841 
842 	/* If FW security is enabled at this point it means no access to ELBI */
843 	if (hdev->asic_prop.fw_security_enabled) {
844 		hdev->asic_prop.iatu_done_by_fw = true;
845 
846 		/*
847 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
848 		 * decision can only be taken based on PCI ID security.
849 		 */
850 		hdev->asic_prop.gic_interrupts_enable = false;
851 		goto pci_init;
852 	}
853 
854 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
855 				&fw_boot_status);
856 	if (rc)
857 		goto free_queue_props;
858 
859 	/* Check whether FW is configuring iATU */
860 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
861 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
862 		hdev->asic_prop.iatu_done_by_fw = true;
863 
864 pci_init:
865 	rc = hl_pci_init(hdev);
866 	if (rc)
867 		goto free_queue_props;
868 
869 	/* Before continuing in the initialization, we need to read the preboot
870 	 * version to determine whether we run with a security-enabled firmware
871 	 */
872 	rc = hl_fw_read_preboot_status(hdev);
873 	if (rc) {
874 		if (hdev->reset_on_preboot_fail)
875 			/* we are already on failure flow, so don't check if hw_fini fails. */
876 			hdev->asic_funcs->hw_fini(hdev, true, false);
877 		goto pci_fini;
878 	}
879 
880 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
881 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
882 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
883 		if (rc) {
884 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
885 			goto pci_fini;
886 		}
887 	}
888 
889 	return 0;
890 
891 pci_fini:
892 	hl_pci_fini(hdev);
893 free_queue_props:
894 	kfree(hdev->asic_prop.hw_queues_props);
895 	return rc;
896 }
897 
898 static int gaudi_early_fini(struct hl_device *hdev)
899 {
900 	kfree(hdev->asic_prop.hw_queues_props);
901 	hl_pci_fini(hdev);
902 
903 	return 0;
904 }
905 
906 /**
907  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
908  *
909  * @hdev: pointer to hl_device structure
910  *
911  */
912 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
913 {
914 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
915 	struct asic_fixed_properties *prop = &hdev->asic_prop;
916 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
917 	int rc;
918 
919 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
920 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
921 		struct gaudi_device *gaudi = hdev->asic_specific;
922 
923 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
924 			return 0;
925 
926 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
927 
928 		if (rc)
929 			return rc;
930 
931 		freq = pll_freq_arr[2];
932 	} else {
933 		/* Backward compatibility */
934 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
935 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
936 		nr = RREG32(mmPSOC_CPU_PLL_NR);
937 		nf = RREG32(mmPSOC_CPU_PLL_NF);
938 		od = RREG32(mmPSOC_CPU_PLL_OD);
939 
940 		if (div_sel == DIV_SEL_REF_CLK ||
941 				div_sel == DIV_SEL_DIVIDED_REF) {
942 			if (div_sel == DIV_SEL_REF_CLK)
943 				freq = PLL_REF_CLK;
944 			else
945 				freq = PLL_REF_CLK / (div_fctr + 1);
946 		} else if (div_sel == DIV_SEL_PLL_CLK ||
947 			div_sel == DIV_SEL_DIVIDED_PLL) {
948 			pll_clk = PLL_REF_CLK * (nf + 1) /
949 					((nr + 1) * (od + 1));
950 			if (div_sel == DIV_SEL_PLL_CLK)
951 				freq = pll_clk;
952 			else
953 				freq = pll_clk / (div_fctr + 1);
954 		} else {
955 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
956 			freq = 0;
957 		}
958 	}
959 
960 	prop->psoc_timestamp_frequency = freq;
961 	prop->psoc_pci_pll_nr = nr;
962 	prop->psoc_pci_pll_nf = nf;
963 	prop->psoc_pci_pll_od = od;
964 	prop->psoc_pci_pll_div_factor = div_fctr;
965 
966 	return 0;
967 }
968 
969 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
970 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
971 {
972 	struct asic_fixed_properties *prop = &hdev->asic_prop;
973 	struct packet_lin_dma *init_tpc_mem_pkt;
974 	struct hl_cs_job *job;
975 	struct hl_cb *cb;
976 	u64 dst_addr;
977 	u32 cb_size, ctl;
978 	u8 tpc_id;
979 	int rc;
980 
981 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
982 	if (!cb)
983 		return -EFAULT;
984 
985 	init_tpc_mem_pkt = cb->kernel_address;
986 	cb_size = sizeof(*init_tpc_mem_pkt);
987 	memset(init_tpc_mem_pkt, 0, cb_size);
988 
989 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
990 
991 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
992 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
993 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
994 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
995 
996 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
997 
998 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
999 
1000 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
1001 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
1002 				round_up(prop->sram_user_base_address, SZ_8K));
1003 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1004 
1005 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1006 	if (!job) {
1007 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1008 		rc = -ENOMEM;
1009 		goto release_cb;
1010 	}
1011 
1012 	job->id = 0;
1013 	job->user_cb = cb;
1014 	atomic_inc(&job->user_cb->cs_cnt);
1015 	job->user_cb_size = cb_size;
1016 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1017 	job->patched_cb = job->user_cb;
1018 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1019 
1020 	hl_debugfs_add_job(hdev, job);
1021 
1022 	rc = gaudi_send_job_on_qman0(hdev, job);
1023 
1024 	if (rc)
1025 		goto free_job;
1026 
1027 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1028 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1029 		if (rc)
1030 			break;
1031 	}
1032 
1033 free_job:
1034 	hl_userptr_delete_list(hdev, &job->userptr_list);
1035 	hl_debugfs_remove_job(hdev, job);
1036 	kfree(job);
1037 	atomic_dec(&cb->cs_cnt);
1038 
1039 release_cb:
1040 	hl_cb_put(cb);
1041 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1042 
1043 	return rc;
1044 }
1045 
1046 /*
1047  * gaudi_init_tpc_mem() - Initialize TPC memories.
1048  * @hdev: Pointer to hl_device structure.
1049  *
1050  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1051  *
1052  * Return: 0 for success, negative value for error.
1053  */
1054 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1055 {
1056 	const struct firmware *fw;
1057 	size_t fw_size;
1058 	void *cpu_addr;
1059 	dma_addr_t dma_handle;
1060 	int rc, count = 5;
1061 
1062 again:
1063 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1064 	if (rc == -EINTR && count-- > 0) {
1065 		msleep(50);
1066 		goto again;
1067 	}
1068 
1069 	if (rc) {
1070 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1071 				GAUDI_TPC_FW_FILE);
1072 		goto out;
1073 	}
1074 
1075 	fw_size = fw->size;
1076 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1077 	if (!cpu_addr) {
1078 		dev_err(hdev->dev,
1079 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1080 			fw_size);
1081 		rc = -ENOMEM;
1082 		goto out;
1083 	}
1084 
1085 	memcpy(cpu_addr, fw->data, fw_size);
1086 
1087 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1088 
1089 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1090 
1091 out:
1092 	release_firmware(fw);
1093 	return rc;
1094 }
1095 
1096 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1097 {
1098 	struct gaudi_device *gaudi = hdev->asic_specific;
1099 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1100 	struct hl_hw_queue *q;
1101 	u32 i, sob_id, sob_group_id, queue_id;
1102 
1103 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1104 	sob_group_id =
1105 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1106 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1107 
1108 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1109 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1110 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1111 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1112 	}
1113 
1114 	/* Both DMA5 and TPC7 use the same resources since only a single
1115 	 * engine need to participate in the reduction process
1116 	 */
1117 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1118 	q = &hdev->kernel_queues[queue_id];
1119 	q->sync_stream_prop.collective_sob_id =
1120 			sob_id + NIC_NUMBER_OF_ENGINES;
1121 
1122 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1123 	q = &hdev->kernel_queues[queue_id];
1124 	q->sync_stream_prop.collective_sob_id =
1125 			sob_id + NIC_NUMBER_OF_ENGINES;
1126 }
1127 
1128 static void gaudi_sob_group_hw_reset(struct kref *ref)
1129 {
1130 	struct gaudi_hw_sob_group *hw_sob_group =
1131 		container_of(ref, struct gaudi_hw_sob_group, kref);
1132 	struct hl_device *hdev = hw_sob_group->hdev;
1133 	int i;
1134 
1135 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1136 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1137 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1138 
1139 	kref_init(&hw_sob_group->kref);
1140 }
1141 
1142 static void gaudi_sob_group_reset_error(struct kref *ref)
1143 {
1144 	struct gaudi_hw_sob_group *hw_sob_group =
1145 		container_of(ref, struct gaudi_hw_sob_group, kref);
1146 	struct hl_device *hdev = hw_sob_group->hdev;
1147 
1148 	dev_crit(hdev->dev,
1149 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1150 		hw_sob_group->base_sob_id);
1151 }
1152 
1153 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1154 {
1155 	struct gaudi_collective_properties *prop;
1156 	int i;
1157 
1158 	prop = &gaudi->collective_props;
1159 
1160 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1161 
1162 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1163 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1164 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1165 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1166 	/* Set collective engine bit */
1167 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1168 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1169 }
1170 
1171 static int gaudi_collective_init(struct hl_device *hdev)
1172 {
1173 	u32 i, sob_id, reserved_sobs_per_group;
1174 	struct gaudi_collective_properties *prop;
1175 	struct gaudi_device *gaudi;
1176 
1177 	gaudi = hdev->asic_specific;
1178 	prop = &gaudi->collective_props;
1179 	sob_id = hdev->asic_prop.collective_first_sob;
1180 
1181 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1182 	reserved_sobs_per_group =
1183 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1184 
1185 	/* Init SOB groups */
1186 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1187 		prop->hw_sob_group[i].hdev = hdev;
1188 		prop->hw_sob_group[i].base_sob_id = sob_id;
1189 		sob_id += reserved_sobs_per_group;
1190 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1191 	}
1192 
1193 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1194 		prop->next_sob_group_val[i] = 1;
1195 		prop->curr_sob_group_idx[i] = 0;
1196 		gaudi_collective_map_sobs(hdev, i);
1197 	}
1198 
1199 	gaudi_collective_mstr_sob_mask_set(gaudi);
1200 
1201 	return 0;
1202 }
1203 
1204 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1205 {
1206 	struct gaudi_device *gaudi = hdev->asic_specific;
1207 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1208 
1209 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1210 					gaudi_sob_group_hw_reset);
1211 }
1212 
1213 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1214 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1215 {
1216 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1217 	struct gaudi_collective_properties *cprop;
1218 	struct hl_gen_wait_properties wait_prop;
1219 	struct hl_sync_stream_properties *prop;
1220 	struct gaudi_device *gaudi;
1221 
1222 	gaudi = hdev->asic_specific;
1223 	cprop = &gaudi->collective_props;
1224 	queue_id = job->hw_queue_id;
1225 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1226 
1227 	master_sob_base =
1228 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1229 	master_monitor = prop->collective_mstr_mon_id[0];
1230 
1231 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1232 
1233 	dev_dbg(hdev->dev,
1234 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1235 		master_sob_base, cprop->mstr_sob_mask[0],
1236 		cprop->next_sob_group_val[stream],
1237 		master_monitor, queue_id);
1238 
1239 	wait_prop.data = (void *) job->patched_cb;
1240 	wait_prop.sob_base = master_sob_base;
1241 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1242 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1243 	wait_prop.mon_id = master_monitor;
1244 	wait_prop.q_idx = queue_id;
1245 	wait_prop.size = cb_size;
1246 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1247 
1248 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1249 	master_monitor = prop->collective_mstr_mon_id[1];
1250 
1251 	dev_dbg(hdev->dev,
1252 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1253 		master_sob_base, cprop->mstr_sob_mask[1],
1254 		cprop->next_sob_group_val[stream],
1255 		master_monitor, queue_id);
1256 
1257 	wait_prop.sob_base = master_sob_base;
1258 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1259 	wait_prop.mon_id = master_monitor;
1260 	wait_prop.size = cb_size;
1261 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1262 }
1263 
1264 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1265 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1266 {
1267 	struct hl_gen_wait_properties wait_prop;
1268 	struct hl_sync_stream_properties *prop;
1269 	u32 queue_id, cb_size = 0;
1270 
1271 	queue_id = job->hw_queue_id;
1272 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1273 
1274 	if (job->cs->encaps_signals) {
1275 		/* use the encaps signal handle store earlier in the flow
1276 		 * and set the SOB information from the encaps
1277 		 * signals handle
1278 		 */
1279 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1280 						cs_cmpl);
1281 
1282 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1283 				job->cs->sequence,
1284 				cs_cmpl->hw_sob->sob_id,
1285 				cs_cmpl->sob_val);
1286 	}
1287 
1288 	/* Add to wait CBs using slave monitor */
1289 	wait_prop.data = (void *) job->user_cb;
1290 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1291 	wait_prop.sob_mask = 0x1;
1292 	wait_prop.sob_val = cs_cmpl->sob_val;
1293 	wait_prop.mon_id = prop->collective_slave_mon_id;
1294 	wait_prop.q_idx = queue_id;
1295 	wait_prop.size = cb_size;
1296 
1297 	dev_dbg(hdev->dev,
1298 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1299 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1300 		prop->collective_slave_mon_id, queue_id);
1301 
1302 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1303 
1304 	dev_dbg(hdev->dev,
1305 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1306 		prop->collective_sob_id, queue_id);
1307 
1308 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1309 			prop->collective_sob_id, cb_size, false);
1310 }
1311 
1312 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1313 {
1314 	struct hl_cs_compl *signal_cs_cmpl =
1315 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1316 	struct hl_cs_compl *cs_cmpl =
1317 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1318 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1319 	struct gaudi_collective_properties *cprop;
1320 	u32 stream, queue_id, sob_group_offset;
1321 	struct gaudi_device *gaudi;
1322 	struct hl_device *hdev;
1323 	struct hl_cs_job *job;
1324 	struct hl_ctx *ctx;
1325 
1326 	ctx = cs->ctx;
1327 	hdev = ctx->hdev;
1328 	gaudi = hdev->asic_specific;
1329 	cprop = &gaudi->collective_props;
1330 
1331 	if (cs->encaps_signals) {
1332 		cs_cmpl->hw_sob = handle->hw_sob;
1333 		/* at this checkpoint we only need the hw_sob pointer
1334 		 * for the completion check before start going over the jobs
1335 		 * of the master/slaves, the sob_value will be taken later on
1336 		 * in gaudi_collective_slave_init_job depends on each
1337 		 * job wait offset value.
1338 		 */
1339 		cs_cmpl->sob_val = 0;
1340 	} else {
1341 		/* copy the SOB id and value of the signal CS */
1342 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1343 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1344 	}
1345 
1346 	/* check again if the signal cs already completed.
1347 	 * if yes then don't send any wait cs since the hw_sob
1348 	 * could be in reset already. if signal is not completed
1349 	 * then get refcount to hw_sob to prevent resetting the sob
1350 	 * while wait cs is not submitted.
1351 	 * note that this check is protected by two locks,
1352 	 * hw queue lock and completion object lock,
1353 	 * and the same completion object lock also protects
1354 	 * the hw_sob reset handler function.
1355 	 * The hw_queue lock prevent out of sync of hw_sob
1356 	 * refcount value, changed by signal/wait flows.
1357 	 */
1358 	spin_lock(&signal_cs_cmpl->lock);
1359 
1360 	if (completion_done(&cs->signal_fence->completion)) {
1361 		spin_unlock(&signal_cs_cmpl->lock);
1362 		return -EINVAL;
1363 	}
1364 	/* Increment kref since all slave queues are now waiting on it */
1365 	kref_get(&cs_cmpl->hw_sob->kref);
1366 
1367 	spin_unlock(&signal_cs_cmpl->lock);
1368 
1369 	/* Calculate the stream from collective master queue (1st job) */
1370 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1371 	stream = job->hw_queue_id % 4;
1372 	sob_group_offset =
1373 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1374 
1375 	list_for_each_entry(job, &cs->job_list, cs_node) {
1376 		queue_id = job->hw_queue_id;
1377 
1378 		if (hdev->kernel_queues[queue_id].collective_mode ==
1379 				HL_COLLECTIVE_MASTER)
1380 			gaudi_collective_master_init_job(hdev, job, stream,
1381 						sob_group_offset);
1382 		else
1383 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1384 	}
1385 
1386 	cs_cmpl->sob_group = sob_group_offset;
1387 
1388 	/* Handle sob group kref and wraparound */
1389 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1390 	cprop->next_sob_group_val[stream]++;
1391 
1392 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1393 		/*
1394 		 * Decrement as we reached the max value.
1395 		 * The release function won't be called here as we've
1396 		 * just incremented the refcount.
1397 		 */
1398 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1399 				gaudi_sob_group_reset_error);
1400 		cprop->next_sob_group_val[stream] = 1;
1401 		/* only two SOBs are currently in use */
1402 		cprop->curr_sob_group_idx[stream] =
1403 			(cprop->curr_sob_group_idx[stream] + 1) &
1404 							(HL_RSVD_SOBS - 1);
1405 
1406 		gaudi_collective_map_sobs(hdev, stream);
1407 
1408 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1409 				cprop->curr_sob_group_idx[stream], stream);
1410 	}
1411 
1412 	mb();
1413 	hl_fence_put(cs->signal_fence);
1414 	cs->signal_fence = NULL;
1415 
1416 	return 0;
1417 }
1418 
1419 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1420 {
1421 	u32 cacheline_end, additional_commands;
1422 
1423 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1424 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1425 
1426 	if (user_cb_size + additional_commands > cacheline_end)
1427 		return cacheline_end - user_cb_size + additional_commands;
1428 	else
1429 		return additional_commands;
1430 }
1431 
1432 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1433 		struct hl_ctx *ctx, struct hl_cs *cs,
1434 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1435 		u32 encaps_signal_offset)
1436 {
1437 	struct hw_queue_properties *hw_queue_prop;
1438 	struct hl_cs_counters_atomic *cntr;
1439 	struct hl_cs_job *job;
1440 	struct hl_cb *cb;
1441 	u32 cb_size;
1442 	bool patched_cb;
1443 
1444 	cntr = &hdev->aggregated_cs_counters;
1445 
1446 	if (mode == HL_COLLECTIVE_MASTER) {
1447 		/* CB size of collective master queue contains
1448 		 * 4 msg short packets for monitor 1 configuration
1449 		 * 1 fence packet
1450 		 * 4 msg short packets for monitor 2 configuration
1451 		 * 1 fence packet
1452 		 * 2 msg prot packets for completion and MSI
1453 		 */
1454 		cb_size = sizeof(struct packet_msg_short) * 8 +
1455 				sizeof(struct packet_fence) * 2 +
1456 				sizeof(struct packet_msg_prot) * 2;
1457 		patched_cb = true;
1458 	} else {
1459 		/* CB size of collective slave queues contains
1460 		 * 4 msg short packets for monitor configuration
1461 		 * 1 fence packet
1462 		 * 1 additional msg short packet for sob signal
1463 		 */
1464 		cb_size = sizeof(struct packet_msg_short) * 5 +
1465 				sizeof(struct packet_fence);
1466 		patched_cb = false;
1467 	}
1468 
1469 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1470 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1471 	if (!job) {
1472 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1473 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1474 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1475 		return -ENOMEM;
1476 	}
1477 
1478 	/* Allocate internal mapped CB for non patched CBs */
1479 	cb = hl_cb_kernel_create(hdev, cb_size,
1480 			hdev->mmu_enable && !patched_cb);
1481 	if (!cb) {
1482 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1483 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1484 		kfree(job);
1485 		return -EFAULT;
1486 	}
1487 
1488 	job->id = 0;
1489 	job->cs = cs;
1490 	job->user_cb = cb;
1491 	atomic_inc(&job->user_cb->cs_cnt);
1492 	job->user_cb_size = cb_size;
1493 	job->hw_queue_id = queue_id;
1494 
1495 	/* since its guaranteed to have only one chunk in the collective wait
1496 	 * cs, we can use this chunk to set the encapsulated signal offset
1497 	 * in the jobs.
1498 	 */
1499 	if (cs->encaps_signals)
1500 		job->encaps_sig_wait_offset = encaps_signal_offset;
1501 
1502 	/*
1503 	 * No need in parsing, user CB is the patched CB.
1504 	 * We call hl_cb_destroy() out of two reasons - we don't need
1505 	 * the CB in the CB idr anymore and to decrement its refcount as
1506 	 * it was incremented inside hl_cb_kernel_create().
1507 	 */
1508 	if (patched_cb)
1509 		job->patched_cb = job->user_cb;
1510 	else
1511 		job->patched_cb = NULL;
1512 
1513 	job->job_cb_size = job->user_cb_size;
1514 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1515 
1516 	/* increment refcount as for external queues we get completion */
1517 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1518 		cs_get(cs);
1519 
1520 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1521 
1522 	list_add_tail(&job->cs_node, &cs->job_list);
1523 
1524 	hl_debugfs_add_job(hdev, job);
1525 
1526 	return 0;
1527 }
1528 
1529 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1530 		struct hl_ctx *ctx, struct hl_cs *cs,
1531 		u32 wait_queue_id, u32 collective_engine_id,
1532 		u32 encaps_signal_offset)
1533 {
1534 	struct gaudi_device *gaudi = hdev->asic_specific;
1535 	struct hw_queue_properties *hw_queue_prop;
1536 	u32 queue_id, collective_queue, num_jobs;
1537 	u32 stream, nic_queue, nic_idx = 0;
1538 	bool skip;
1539 	int i, rc = 0;
1540 
1541 	/* Verify wait queue id is configured as master */
1542 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1543 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1544 		dev_err(hdev->dev,
1545 			"Queue %d is not configured as collective master\n",
1546 			wait_queue_id);
1547 		return -EINVAL;
1548 	}
1549 
1550 	/* Verify engine id is supported */
1551 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1552 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1553 		dev_err(hdev->dev,
1554 			"Collective wait does not support engine %u\n",
1555 			collective_engine_id);
1556 		return -EINVAL;
1557 	}
1558 
1559 	stream = wait_queue_id % 4;
1560 
1561 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1562 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1563 	else
1564 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1565 
1566 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1567 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1568 
1569 	/* First job goes to the collective master queue, it will wait for
1570 	 * the collective slave queues to finish execution.
1571 	 * The synchronization is done using two monitors:
1572 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1573 	 * reduction engine (DMA5/TPC7).
1574 	 *
1575 	 * Rest of the jobs goes to the collective slave queues which will
1576 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1577 	 */
1578 	for (i = 0 ; i < num_jobs ; i++) {
1579 		if (i == 0) {
1580 			queue_id = wait_queue_id;
1581 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1582 				HL_COLLECTIVE_MASTER, queue_id,
1583 				wait_queue_id, encaps_signal_offset);
1584 		} else {
1585 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1586 				if (gaudi->hw_cap_initialized &
1587 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1588 					skip = false;
1589 				else
1590 					skip = true;
1591 
1592 				queue_id = nic_queue;
1593 				nic_queue += 4;
1594 				nic_idx++;
1595 
1596 				if (skip)
1597 					continue;
1598 			} else {
1599 				queue_id = collective_queue;
1600 			}
1601 
1602 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1603 				HL_COLLECTIVE_SLAVE, queue_id,
1604 				wait_queue_id, encaps_signal_offset);
1605 		}
1606 
1607 		if (rc)
1608 			return rc;
1609 	}
1610 
1611 	return rc;
1612 }
1613 
1614 static int gaudi_late_init(struct hl_device *hdev)
1615 {
1616 	struct gaudi_device *gaudi = hdev->asic_specific;
1617 	int rc;
1618 
1619 	rc = gaudi->cpucp_info_get(hdev);
1620 	if (rc) {
1621 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1622 		return rc;
1623 	}
1624 
1625 	if ((hdev->card_type == cpucp_card_type_pci) &&
1626 			(hdev->nic_ports_mask & 0x3)) {
1627 		dev_info(hdev->dev,
1628 			"PCI card detected, only 8 ports are enabled\n");
1629 		hdev->nic_ports_mask &= ~0x3;
1630 
1631 		/* Stop and disable unused NIC QMANs */
1632 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1633 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1634 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1635 
1636 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1637 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1638 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1639 
1640 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1641 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1642 
1643 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1644 	}
1645 
1646 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1647 	if (rc) {
1648 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1649 		return rc;
1650 	}
1651 
1652 	/* Scrub both SRAM and DRAM */
1653 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1654 	if (rc)
1655 		goto disable_pci_access;
1656 
1657 	rc = gaudi_fetch_psoc_frequency(hdev);
1658 	if (rc) {
1659 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1660 		goto disable_pci_access;
1661 	}
1662 
1663 	rc = gaudi_mmu_clear_pgt_range(hdev);
1664 	if (rc) {
1665 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1666 		goto disable_pci_access;
1667 	}
1668 
1669 	rc = gaudi_init_tpc_mem(hdev);
1670 	if (rc) {
1671 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1672 		goto disable_pci_access;
1673 	}
1674 
1675 	rc = gaudi_collective_init(hdev);
1676 	if (rc) {
1677 		dev_err(hdev->dev, "Failed to init collective\n");
1678 		goto disable_pci_access;
1679 	}
1680 
1681 	/* We only support a single ASID for the user, so for the sake of optimization, just
1682 	 * initialize the ASID one time during device initialization with the fixed value of 1
1683 	 */
1684 	gaudi_mmu_prepare(hdev, 1);
1685 
1686 	hl_fw_set_pll_profile(hdev);
1687 
1688 	return 0;
1689 
1690 disable_pci_access:
1691 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1692 
1693 	return rc;
1694 }
1695 
1696 static void gaudi_late_fini(struct hl_device *hdev)
1697 {
1698 	hl_hwmon_release_resources(hdev);
1699 }
1700 
1701 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1702 {
1703 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1704 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1705 	int i, j, rc = 0;
1706 
1707 	/*
1708 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1709 	 * to '1' when accessing the host.
1710 	 * Bits 49:39 of the full host address are saved for a later
1711 	 * configuration of the HW to perform extension to 50 bits.
1712 	 * Because there is a single HW register that holds the extension bits,
1713 	 * these bits must be identical in all allocated range.
1714 	 */
1715 
1716 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1717 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1718 								&dma_addr_arr[i],
1719 								GFP_KERNEL | __GFP_ZERO);
1720 		if (!virt_addr_arr[i]) {
1721 			rc = -ENOMEM;
1722 			goto free_dma_mem_arr;
1723 		}
1724 
1725 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1726 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1727 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1728 			break;
1729 	}
1730 
1731 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1732 		dev_err(hdev->dev,
1733 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1734 		rc = -EFAULT;
1735 		goto free_dma_mem_arr;
1736 	}
1737 
1738 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1739 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1740 	hdev->cpu_pci_msb_addr =
1741 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1742 
1743 	if (!hdev->asic_prop.fw_security_enabled)
1744 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1745 
1746 free_dma_mem_arr:
1747 	for (j = 0 ; j < i ; j++)
1748 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1749 						dma_addr_arr[j]);
1750 
1751 	return rc;
1752 }
1753 
1754 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1755 {
1756 	struct gaudi_device *gaudi = hdev->asic_specific;
1757 	struct gaudi_internal_qman_info *q;
1758 	u32 i;
1759 
1760 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1761 		q = &gaudi->internal_qmans[i];
1762 		if (!q->pq_kernel_addr)
1763 			continue;
1764 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1765 	}
1766 }
1767 
1768 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1769 {
1770 	struct gaudi_device *gaudi = hdev->asic_specific;
1771 	struct gaudi_internal_qman_info *q;
1772 	int rc, i;
1773 
1774 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1775 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1776 			continue;
1777 
1778 		q = &gaudi->internal_qmans[i];
1779 
1780 		switch (i) {
1781 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1782 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1783 			break;
1784 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1785 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1786 			break;
1787 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1788 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1789 			break;
1790 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1791 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1792 			break;
1793 		default:
1794 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1795 			rc = -EINVAL;
1796 			goto free_internal_qmans_pq_mem;
1797 		}
1798 
1799 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1800 								GFP_KERNEL | __GFP_ZERO);
1801 		if (!q->pq_kernel_addr) {
1802 			rc = -ENOMEM;
1803 			goto free_internal_qmans_pq_mem;
1804 		}
1805 	}
1806 
1807 	return 0;
1808 
1809 free_internal_qmans_pq_mem:
1810 	gaudi_free_internal_qmans_pq_mem(hdev);
1811 	return rc;
1812 }
1813 
1814 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1815 {
1816 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1817 	struct pci_mem_region *region;
1818 
1819 	/* CFG */
1820 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1821 	region->region_base = CFG_BASE;
1822 	region->region_size = CFG_SIZE;
1823 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1824 	region->bar_size = CFG_BAR_SIZE;
1825 	region->bar_id = CFG_BAR_ID;
1826 	region->used = 1;
1827 
1828 	/* SRAM */
1829 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1830 	region->region_base = SRAM_BASE_ADDR;
1831 	region->region_size = SRAM_SIZE;
1832 	region->offset_in_bar = 0;
1833 	region->bar_size = SRAM_BAR_SIZE;
1834 	region->bar_id = SRAM_BAR_ID;
1835 	region->used = 1;
1836 
1837 	/* DRAM */
1838 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1839 	region->region_base = DRAM_PHYS_BASE;
1840 	region->region_size = hdev->asic_prop.dram_size;
1841 	region->offset_in_bar = 0;
1842 	region->bar_size = prop->dram_pci_bar_size;
1843 	region->bar_id = HBM_BAR_ID;
1844 	region->used = 1;
1845 
1846 	/* SP SRAM */
1847 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1848 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1849 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1850 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1851 	region->bar_size = CFG_BAR_SIZE;
1852 	region->bar_id = CFG_BAR_ID;
1853 	region->used = 1;
1854 }
1855 
1856 static int gaudi_sw_init(struct hl_device *hdev)
1857 {
1858 	struct gaudi_device *gaudi;
1859 	u32 i, event_id = 0;
1860 	int rc;
1861 
1862 	/* Allocate device structure */
1863 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1864 	if (!gaudi)
1865 		return -ENOMEM;
1866 
1867 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1868 		if (gaudi_irq_map_table[i].valid) {
1869 			if (event_id == GAUDI_EVENT_SIZE) {
1870 				dev_err(hdev->dev,
1871 					"Event array exceeds the limit of %u events\n",
1872 					GAUDI_EVENT_SIZE);
1873 				rc = -EINVAL;
1874 				goto free_gaudi_device;
1875 			}
1876 
1877 			gaudi->events[event_id++] =
1878 					gaudi_irq_map_table[i].fc_id;
1879 		}
1880 	}
1881 
1882 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1883 
1884 	hdev->asic_specific = gaudi;
1885 
1886 	/* Create DMA pool for small allocations */
1887 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1888 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1889 	if (!hdev->dma_pool) {
1890 		dev_err(hdev->dev, "failed to create DMA pool\n");
1891 		rc = -ENOMEM;
1892 		goto free_gaudi_device;
1893 	}
1894 
1895 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1896 	if (rc)
1897 		goto free_dma_pool;
1898 
1899 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1900 	if (!hdev->cpu_accessible_dma_pool) {
1901 		dev_err(hdev->dev,
1902 			"Failed to create CPU accessible DMA pool\n");
1903 		rc = -ENOMEM;
1904 		goto free_cpu_dma_mem;
1905 	}
1906 
1907 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1908 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1909 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1910 	if (rc) {
1911 		dev_err(hdev->dev,
1912 			"Failed to add memory to CPU accessible DMA pool\n");
1913 		rc = -EFAULT;
1914 		goto free_cpu_accessible_dma_pool;
1915 	}
1916 
1917 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1918 	if (rc)
1919 		goto free_cpu_accessible_dma_pool;
1920 
1921 	spin_lock_init(&gaudi->hw_queues_lock);
1922 
1923 	hdev->supports_sync_stream = true;
1924 	hdev->supports_coresight = true;
1925 	hdev->supports_staged_submission = true;
1926 	hdev->supports_wait_for_multi_cs = true;
1927 
1928 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1929 	hdev->stream_master_qid_arr =
1930 				hdev->asic_funcs->get_stream_master_qid_arr();
1931 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1932 
1933 	return 0;
1934 
1935 free_cpu_accessible_dma_pool:
1936 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1937 free_cpu_dma_mem:
1938 	if (!hdev->asic_prop.fw_security_enabled)
1939 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1940 					hdev->cpu_pci_msb_addr);
1941 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1942 					hdev->cpu_accessible_dma_address);
1943 free_dma_pool:
1944 	dma_pool_destroy(hdev->dma_pool);
1945 free_gaudi_device:
1946 	kfree(gaudi);
1947 	return rc;
1948 }
1949 
1950 static int gaudi_sw_fini(struct hl_device *hdev)
1951 {
1952 	struct gaudi_device *gaudi = hdev->asic_specific;
1953 
1954 	gaudi_free_internal_qmans_pq_mem(hdev);
1955 
1956 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1957 
1958 	if (!hdev->asic_prop.fw_security_enabled)
1959 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1960 					hdev->cpu_pci_msb_addr);
1961 
1962 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1963 					hdev->cpu_accessible_dma_address);
1964 
1965 	dma_pool_destroy(hdev->dma_pool);
1966 
1967 	kfree(gaudi);
1968 
1969 	return 0;
1970 }
1971 
1972 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1973 {
1974 	struct hl_device *hdev = arg;
1975 	int i;
1976 
1977 	if (hdev->disabled)
1978 		return IRQ_HANDLED;
1979 
1980 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1981 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1982 
1983 	hl_irq_handler_eq(irq, &hdev->event_queue);
1984 
1985 	return IRQ_HANDLED;
1986 }
1987 
1988 /*
1989  * For backward compatibility, new MSI interrupts should be set after the
1990  * existing CPU and NIC interrupts.
1991  */
1992 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1993 				bool cpu_eq)
1994 {
1995 	int msi_vec;
1996 
1997 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1998 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1999 				GAUDI_EVENT_QUEUE_MSI_IDX);
2000 
2001 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
2002 			(nr + NIC_NUMBER_OF_ENGINES + 1);
2003 
2004 	return pci_irq_vector(hdev->pdev, msi_vec);
2005 }
2006 
2007 static int gaudi_enable_msi_single(struct hl_device *hdev)
2008 {
2009 	int rc, irq;
2010 
2011 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2012 
2013 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2014 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2015 			"gaudi single msi", hdev);
2016 	if (rc)
2017 		dev_err(hdev->dev,
2018 			"Failed to request single MSI IRQ\n");
2019 
2020 	return rc;
2021 }
2022 
2023 static int gaudi_enable_msi(struct hl_device *hdev)
2024 {
2025 	struct gaudi_device *gaudi = hdev->asic_specific;
2026 	int rc;
2027 
2028 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2029 		return 0;
2030 
2031 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2032 	if (rc < 0) {
2033 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2034 		return rc;
2035 	}
2036 
2037 	rc = gaudi_enable_msi_single(hdev);
2038 	if (rc)
2039 		goto free_pci_irq_vectors;
2040 
2041 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2042 
2043 	return 0;
2044 
2045 free_pci_irq_vectors:
2046 	pci_free_irq_vectors(hdev->pdev);
2047 	return rc;
2048 }
2049 
2050 static void gaudi_sync_irqs(struct hl_device *hdev)
2051 {
2052 	struct gaudi_device *gaudi = hdev->asic_specific;
2053 
2054 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2055 		return;
2056 
2057 	/* Wait for all pending IRQs to be finished */
2058 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2059 }
2060 
2061 static void gaudi_disable_msi(struct hl_device *hdev)
2062 {
2063 	struct gaudi_device *gaudi = hdev->asic_specific;
2064 
2065 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2066 		return;
2067 
2068 	gaudi_sync_irqs(hdev);
2069 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2070 	pci_free_irq_vectors(hdev->pdev);
2071 
2072 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2073 }
2074 
2075 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2076 {
2077 	struct gaudi_device *gaudi = hdev->asic_specific;
2078 
2079 	if (hdev->asic_prop.fw_security_enabled)
2080 		return;
2081 
2082 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2083 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2084 		return;
2085 
2086 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2087 		return;
2088 
2089 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2090 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2092 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2094 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2096 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2098 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2099 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2100 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2102 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2104 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 
2106 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2107 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2109 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2111 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2113 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2115 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2116 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2117 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2118 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2119 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2120 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2121 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122 
2123 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2124 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2126 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2128 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2130 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2132 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2133 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2134 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2135 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2136 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2137 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2138 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2139 
2140 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2141 }
2142 
2143 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2144 {
2145 	struct gaudi_device *gaudi = hdev->asic_specific;
2146 
2147 	if (hdev->asic_prop.fw_security_enabled)
2148 		return;
2149 
2150 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2151 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2152 		return;
2153 
2154 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2155 		return;
2156 
2157 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2166 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2167 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 
2174 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2175 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2177 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2179 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2181 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2183 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2184 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2185 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2186 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2187 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2188 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2189 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2190 
2191 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2192 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2194 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2196 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2198 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2200 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2201 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2202 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2203 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2204 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2205 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2206 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2207 
2208 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2209 }
2210 
2211 static void gaudi_init_e2e(struct hl_device *hdev)
2212 {
2213 	if (hdev->asic_prop.fw_security_enabled)
2214 		return;
2215 
2216 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2217 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2218 		return;
2219 
2220 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2221 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2222 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2223 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2224 
2225 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2226 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2227 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2228 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2229 
2230 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2231 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2232 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2233 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2234 
2235 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2236 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2237 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2238 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2239 
2240 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2241 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2242 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2243 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2244 
2245 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2246 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2247 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2248 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2249 
2250 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2251 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2252 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2253 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2254 
2255 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2256 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2257 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2258 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2259 
2260 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2261 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2262 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2263 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2264 
2265 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2266 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2267 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2268 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2269 
2270 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2271 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2272 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2273 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2274 
2275 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2276 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2277 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2278 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2279 
2280 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2281 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2282 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2283 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2284 
2285 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2286 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2287 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2288 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2289 
2290 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2291 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2292 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2293 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2294 
2295 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2296 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2297 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2298 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2299 
2300 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2301 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2302 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2303 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2304 
2305 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2306 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2307 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2308 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2309 
2310 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2311 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2312 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2313 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2314 
2315 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2316 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2317 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2318 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2319 
2320 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2321 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2322 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2323 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2324 
2325 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2326 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2327 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2328 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2329 
2330 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2331 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2332 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2333 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2334 
2335 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2336 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2337 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2338 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2339 
2340 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2341 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2343 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344 
2345 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2346 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2348 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349 
2350 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2351 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2353 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354 
2355 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2356 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2358 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359 
2360 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2361 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2363 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364 
2365 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2366 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2368 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369 
2370 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2371 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2373 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374 
2375 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2376 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2378 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379 
2380 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2381 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2383 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384 
2385 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2386 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2388 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389 
2390 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2391 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2393 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394 
2395 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2396 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2398 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399 
2400 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2401 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2403 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404 
2405 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2406 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2408 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409 
2410 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2411 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2413 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414 
2415 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2416 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2417 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2418 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2419 
2420 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2421 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2423 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424 
2425 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2426 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2428 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429 
2430 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2431 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2433 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434 
2435 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2436 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2438 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439 
2440 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2441 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2443 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444 
2445 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2446 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2448 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449 
2450 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2451 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2452 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2453 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2454 
2455 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2456 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2457 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2458 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2459 }
2460 
2461 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2462 {
2463 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2464 
2465 	if (hdev->asic_prop.fw_security_enabled)
2466 		return;
2467 
2468 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2469 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2470 		return;
2471 
2472 	hbm0_wr = 0x33333333;
2473 	hbm0_rd = 0x77777777;
2474 	hbm1_wr = 0x55555555;
2475 	hbm1_rd = 0xDDDDDDDD;
2476 
2477 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2478 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2479 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2480 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2481 
2482 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2483 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2484 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2485 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2486 
2487 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2488 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2489 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2490 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2491 
2492 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2493 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2494 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2495 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2496 
2497 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2498 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2499 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2500 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2501 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2503 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2504 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2507 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 
2510 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2511 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2512 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2513 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2514 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2515 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2516 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2517 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2518 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2519 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2520 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2521 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2522 }
2523 
2524 static void gaudi_init_golden_registers(struct hl_device *hdev)
2525 {
2526 	u32 tpc_offset;
2527 	int tpc_id, i;
2528 
2529 	gaudi_init_e2e(hdev);
2530 	gaudi_init_hbm_cred(hdev);
2531 
2532 	for (tpc_id = 0, tpc_offset = 0;
2533 				tpc_id < TPC_NUMBER_OF_ENGINES;
2534 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2535 		/* Mask all arithmetic interrupts from TPC */
2536 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2537 		/* Set 16 cache lines */
2538 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2539 				ICACHE_FETCH_LINE_NUM, 2);
2540 	}
2541 
2542 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2543 	for (i = 0 ; i < 128 ; i += 8)
2544 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2545 
2546 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2547 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2548 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2549 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2550 }
2551 
2552 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2553 					int qman_id, dma_addr_t qman_pq_addr)
2554 {
2555 	struct cpu_dyn_regs *dyn_regs =
2556 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2557 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2558 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2559 	u32 q_off, dma_qm_offset;
2560 	u32 dma_qm_err_cfg, irq_handler_offset;
2561 
2562 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2563 
2564 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2565 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2567 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 	so_base_en_lo = lower_32_bits(CFG_BASE +
2569 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570 	so_base_en_hi = upper_32_bits(CFG_BASE +
2571 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2573 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2574 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2575 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2576 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2577 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2578 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2579 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2580 
2581 	q_off = dma_qm_offset + qman_id * 4;
2582 
2583 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2584 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2585 
2586 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2587 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2588 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2589 
2590 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2591 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2592 							QMAN_LDMA_SRC_OFFSET);
2593 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2594 							QMAN_LDMA_DST_OFFSET);
2595 
2596 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2597 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2598 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2599 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2600 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2601 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2602 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2603 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2604 
2605 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2606 
2607 	/* The following configuration is needed only once per QMAN */
2608 	if (qman_id == 0) {
2609 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2610 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2611 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2612 
2613 		/* Configure RAZWI IRQ */
2614 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2615 		if (hdev->stop_on_err)
2616 			dma_qm_err_cfg |=
2617 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2618 
2619 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2620 
2621 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2622 			lower_32_bits(CFG_BASE + irq_handler_offset));
2623 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2624 			upper_32_bits(CFG_BASE + irq_handler_offset));
2625 
2626 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2627 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2628 									dma_id);
2629 
2630 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2631 				QM_ARB_ERR_MSG_EN_MASK);
2632 
2633 		/* Set timeout to maximum */
2634 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2635 
2636 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2637 				QMAN_EXTERNAL_MAKE_TRUSTED);
2638 
2639 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2640 	}
2641 }
2642 
2643 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2644 {
2645 	struct cpu_dyn_regs *dyn_regs =
2646 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2647 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2648 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2649 	u32 irq_handler_offset;
2650 
2651 	/* Set to maximum possible according to physical size */
2652 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2653 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2654 
2655 	/* WA for H/W bug H3-2116 */
2656 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2657 
2658 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2659 	if (hdev->stop_on_err)
2660 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2661 
2662 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2663 
2664 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2665 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2666 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2667 
2668 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2669 		lower_32_bits(CFG_BASE + irq_handler_offset));
2670 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2671 		upper_32_bits(CFG_BASE + irq_handler_offset));
2672 
2673 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2674 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2675 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2676 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2677 	/* If the channel is secured, it should be in MMU bypass mode */
2678 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2679 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2680 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2681 }
2682 
2683 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2684 				u32 enable_mask)
2685 {
2686 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2687 
2688 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2689 }
2690 
2691 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2692 {
2693 	struct gaudi_device *gaudi = hdev->asic_specific;
2694 	struct hl_hw_queue *q;
2695 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2696 
2697 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2698 		return;
2699 
2700 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2701 		dma_id = gaudi_dma_assignment[i];
2702 		/*
2703 		 * For queues after the CPU Q need to add 1 to get the correct
2704 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2705 		 * order to get the correct MSI register.
2706 		 */
2707 		if (dma_id > 1) {
2708 			cpu_skip = 1;
2709 			nic_skip = NIC_NUMBER_OF_ENGINES;
2710 		} else {
2711 			cpu_skip = 0;
2712 			nic_skip = 0;
2713 		}
2714 
2715 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2716 			q_idx = 4 * dma_id + j + cpu_skip;
2717 			q = &hdev->kernel_queues[q_idx];
2718 			q->cq_id = cq_id++;
2719 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2720 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2721 						q->bus_address);
2722 		}
2723 
2724 		gaudi_init_dma_core(hdev, dma_id);
2725 
2726 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2727 	}
2728 
2729 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2730 }
2731 
2732 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2733 					int qman_id, u64 qman_base_addr)
2734 {
2735 	struct cpu_dyn_regs *dyn_regs =
2736 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2737 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2738 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2739 	u32 dma_qm_err_cfg, irq_handler_offset;
2740 	u32 q_off, dma_qm_offset;
2741 
2742 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2743 
2744 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2745 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2747 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 	so_base_en_lo = lower_32_bits(CFG_BASE +
2749 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750 	so_base_en_hi = upper_32_bits(CFG_BASE +
2751 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2753 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2754 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2755 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2756 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2757 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2758 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2759 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2760 
2761 	q_off = dma_qm_offset + qman_id * 4;
2762 
2763 	if (qman_id < 4) {
2764 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2765 					lower_32_bits(qman_base_addr));
2766 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2767 					upper_32_bits(qman_base_addr));
2768 
2769 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2770 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2771 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2772 
2773 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2774 							QMAN_CPDMA_SIZE_OFFSET);
2775 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2776 							QMAN_CPDMA_SRC_OFFSET);
2777 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2778 							QMAN_CPDMA_DST_OFFSET);
2779 	} else {
2780 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2781 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2782 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2783 
2784 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2785 							QMAN_LDMA_SIZE_OFFSET);
2786 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2787 							QMAN_LDMA_SRC_OFFSET);
2788 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2789 							QMAN_LDMA_DST_OFFSET);
2790 
2791 		/* Configure RAZWI IRQ */
2792 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2793 		if (hdev->stop_on_err)
2794 			dma_qm_err_cfg |=
2795 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2796 
2797 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2798 
2799 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2800 			lower_32_bits(CFG_BASE + irq_handler_offset));
2801 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2802 			upper_32_bits(CFG_BASE + irq_handler_offset));
2803 
2804 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2805 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2806 									dma_id);
2807 
2808 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2809 				QM_ARB_ERR_MSG_EN_MASK);
2810 
2811 		/* Set timeout to maximum */
2812 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2813 
2814 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2815 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2816 				QMAN_INTERNAL_MAKE_TRUSTED);
2817 	}
2818 
2819 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2820 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2821 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2822 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2823 
2824 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2825 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2826 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2827 				mtr_base_ws_lo);
2828 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2829 				mtr_base_ws_hi);
2830 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2831 				so_base_ws_lo);
2832 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2833 				so_base_ws_hi);
2834 	}
2835 }
2836 
2837 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2838 {
2839 	struct gaudi_device *gaudi = hdev->asic_specific;
2840 	struct gaudi_internal_qman_info *q;
2841 	u64 qman_base_addr;
2842 	int i, j, dma_id, internal_q_index;
2843 
2844 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2845 		return;
2846 
2847 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2848 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2849 
2850 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2851 			 /*
2852 			  * Add the CPU queue in order to get the correct queue
2853 			  * number as all internal queue are placed after it
2854 			  */
2855 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2856 
2857 			q = &gaudi->internal_qmans[internal_q_index];
2858 			qman_base_addr = (u64) q->pq_dma_addr;
2859 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2860 						qman_base_addr);
2861 		}
2862 
2863 		/* Initializing lower CP for HBM DMA QMAN */
2864 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2865 
2866 		gaudi_init_dma_core(hdev, dma_id);
2867 
2868 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2869 	}
2870 
2871 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2872 }
2873 
2874 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2875 					int qman_id, u64 qman_base_addr)
2876 {
2877 	struct cpu_dyn_regs *dyn_regs =
2878 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2879 	u32 mtr_base_lo, mtr_base_hi;
2880 	u32 so_base_lo, so_base_hi;
2881 	u32 irq_handler_offset;
2882 	u32 q_off, mme_id;
2883 	u32 mme_qm_err_cfg;
2884 
2885 	mtr_base_lo = lower_32_bits(CFG_BASE +
2886 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2887 	mtr_base_hi = upper_32_bits(CFG_BASE +
2888 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2889 	so_base_lo = lower_32_bits(CFG_BASE +
2890 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2891 	so_base_hi = upper_32_bits(CFG_BASE +
2892 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2893 
2894 	q_off = mme_offset + qman_id * 4;
2895 
2896 	if (qman_id < 4) {
2897 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2898 					lower_32_bits(qman_base_addr));
2899 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2900 					upper_32_bits(qman_base_addr));
2901 
2902 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2903 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2904 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2905 
2906 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2907 							QMAN_CPDMA_SIZE_OFFSET);
2908 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2909 							QMAN_CPDMA_SRC_OFFSET);
2910 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2911 							QMAN_CPDMA_DST_OFFSET);
2912 	} else {
2913 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2914 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2915 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2916 
2917 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2918 							QMAN_LDMA_SIZE_OFFSET);
2919 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2920 							QMAN_LDMA_SRC_OFFSET);
2921 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2922 							QMAN_LDMA_DST_OFFSET);
2923 
2924 		/* Configure RAZWI IRQ */
2925 		mme_id = mme_offset /
2926 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2927 
2928 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2929 		if (hdev->stop_on_err)
2930 			mme_qm_err_cfg |=
2931 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2932 
2933 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2934 
2935 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2936 			lower_32_bits(CFG_BASE + irq_handler_offset));
2937 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2938 			upper_32_bits(CFG_BASE + irq_handler_offset));
2939 
2940 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2941 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2942 									mme_id);
2943 
2944 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2945 				QM_ARB_ERR_MSG_EN_MASK);
2946 
2947 		/* Set timeout to maximum */
2948 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2949 
2950 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2951 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2952 				QMAN_INTERNAL_MAKE_TRUSTED);
2953 	}
2954 
2955 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2956 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2957 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2958 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2959 }
2960 
2961 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2962 {
2963 	struct gaudi_device *gaudi = hdev->asic_specific;
2964 	struct gaudi_internal_qman_info *q;
2965 	u64 qman_base_addr;
2966 	u32 mme_offset;
2967 	int i, internal_q_index;
2968 
2969 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2970 		return;
2971 
2972 	/*
2973 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2974 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2975 	 */
2976 
2977 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2978 
2979 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2980 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2981 		q = &gaudi->internal_qmans[internal_q_index];
2982 		qman_base_addr = (u64) q->pq_dma_addr;
2983 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2984 					qman_base_addr);
2985 		if (i == 3)
2986 			mme_offset = 0;
2987 	}
2988 
2989 	/* Initializing lower CP for MME QMANs */
2990 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2991 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2992 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2993 
2994 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2995 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2996 
2997 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2998 }
2999 
3000 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3001 				int qman_id, u64 qman_base_addr)
3002 {
3003 	struct cpu_dyn_regs *dyn_regs =
3004 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3005 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3006 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3007 	u32 tpc_qm_err_cfg, irq_handler_offset;
3008 	u32 q_off, tpc_id;
3009 
3010 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3011 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3012 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3013 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 	so_base_en_lo = lower_32_bits(CFG_BASE +
3015 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016 	so_base_en_hi = upper_32_bits(CFG_BASE +
3017 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3019 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3020 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3021 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3022 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3023 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3024 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3025 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3026 
3027 	q_off = tpc_offset + qman_id * 4;
3028 
3029 	tpc_id = tpc_offset /
3030 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3031 
3032 	if (qman_id < 4) {
3033 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3034 					lower_32_bits(qman_base_addr));
3035 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3036 					upper_32_bits(qman_base_addr));
3037 
3038 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3039 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3040 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3041 
3042 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3043 							QMAN_CPDMA_SIZE_OFFSET);
3044 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3045 							QMAN_CPDMA_SRC_OFFSET);
3046 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3047 							QMAN_CPDMA_DST_OFFSET);
3048 	} else {
3049 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3050 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3051 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3052 
3053 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3054 							QMAN_LDMA_SIZE_OFFSET);
3055 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3056 							QMAN_LDMA_SRC_OFFSET);
3057 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3058 							QMAN_LDMA_DST_OFFSET);
3059 
3060 		/* Configure RAZWI IRQ */
3061 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3062 		if (hdev->stop_on_err)
3063 			tpc_qm_err_cfg |=
3064 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3065 
3066 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3067 
3068 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3069 			lower_32_bits(CFG_BASE + irq_handler_offset));
3070 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3071 			upper_32_bits(CFG_BASE + irq_handler_offset));
3072 
3073 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3074 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3075 									tpc_id);
3076 
3077 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3078 				QM_ARB_ERR_MSG_EN_MASK);
3079 
3080 		/* Set timeout to maximum */
3081 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3082 
3083 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3084 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3085 				QMAN_INTERNAL_MAKE_TRUSTED);
3086 	}
3087 
3088 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3089 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3090 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3091 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3092 
3093 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3094 	if (tpc_id == 6) {
3095 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3096 				mtr_base_ws_lo);
3097 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3098 				mtr_base_ws_hi);
3099 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3100 				so_base_ws_lo);
3101 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3102 				so_base_ws_hi);
3103 	}
3104 }
3105 
3106 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3107 {
3108 	struct gaudi_device *gaudi = hdev->asic_specific;
3109 	struct gaudi_internal_qman_info *q;
3110 	u64 qman_base_addr;
3111 	u32 so_base_hi, tpc_offset = 0;
3112 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3113 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3114 	int i, tpc_id, internal_q_index;
3115 
3116 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3117 		return;
3118 
3119 	so_base_hi = upper_32_bits(CFG_BASE +
3120 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3121 
3122 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3123 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3124 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3125 						tpc_id * QMAN_STREAMS + i;
3126 			q = &gaudi->internal_qmans[internal_q_index];
3127 			qman_base_addr = (u64) q->pq_dma_addr;
3128 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3129 						qman_base_addr);
3130 
3131 			if (i == 3) {
3132 				/* Initializing lower CP for TPC QMAN */
3133 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3134 
3135 				/* Enable the QMAN and TPC channel */
3136 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3137 						QMAN_TPC_ENABLE);
3138 			}
3139 		}
3140 
3141 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3142 				so_base_hi);
3143 
3144 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3145 
3146 		gaudi->hw_cap_initialized |=
3147 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3148 	}
3149 }
3150 
3151 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3152 				int qman_id, u64 qman_base_addr, int nic_id)
3153 {
3154 	struct cpu_dyn_regs *dyn_regs =
3155 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3156 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3157 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3158 	u32 nic_qm_err_cfg, irq_handler_offset;
3159 	u32 q_off;
3160 
3161 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3164 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167 	so_base_en_hi = upper_32_bits(CFG_BASE +
3168 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3170 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3172 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3174 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3176 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177 
3178 	q_off = nic_offset + qman_id * 4;
3179 
3180 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3181 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3182 
3183 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3184 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3185 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3186 
3187 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3188 							QMAN_LDMA_SIZE_OFFSET);
3189 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3190 							QMAN_LDMA_SRC_OFFSET);
3191 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3192 							QMAN_LDMA_DST_OFFSET);
3193 
3194 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3195 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3196 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3197 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3198 
3199 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3200 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3201 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3202 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3203 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3204 
3205 	if (qman_id == 0) {
3206 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3207 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3208 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3209 
3210 		/* Configure RAZWI IRQ */
3211 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3212 		if (hdev->stop_on_err)
3213 			nic_qm_err_cfg |=
3214 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3215 
3216 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3217 
3218 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3219 			lower_32_bits(CFG_BASE + irq_handler_offset));
3220 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3221 			upper_32_bits(CFG_BASE + irq_handler_offset));
3222 
3223 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3224 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3225 									nic_id);
3226 
3227 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3228 				QM_ARB_ERR_MSG_EN_MASK);
3229 
3230 		/* Set timeout to maximum */
3231 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3232 
3233 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3234 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3235 				QMAN_INTERNAL_MAKE_TRUSTED);
3236 	}
3237 }
3238 
3239 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3240 {
3241 	struct gaudi_device *gaudi = hdev->asic_specific;
3242 	struct gaudi_internal_qman_info *q;
3243 	u64 qman_base_addr;
3244 	u32 nic_offset = 0;
3245 	u32 nic_delta_between_qmans =
3246 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3247 	u32 nic_delta_between_nics =
3248 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3249 	int i, nic_id, internal_q_index;
3250 
3251 	if (!hdev->nic_ports_mask)
3252 		return;
3253 
3254 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3255 		return;
3256 
3257 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3258 
3259 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3260 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3261 			nic_offset += nic_delta_between_qmans;
3262 			if (nic_id & 1) {
3263 				nic_offset -= (nic_delta_between_qmans * 2);
3264 				nic_offset += nic_delta_between_nics;
3265 			}
3266 			continue;
3267 		}
3268 
3269 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3270 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3271 						nic_id * QMAN_STREAMS + i;
3272 			q = &gaudi->internal_qmans[internal_q_index];
3273 			qman_base_addr = (u64) q->pq_dma_addr;
3274 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3275 						qman_base_addr, nic_id);
3276 		}
3277 
3278 		/* Enable the QMAN */
3279 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3280 
3281 		nic_offset += nic_delta_between_qmans;
3282 		if (nic_id & 1) {
3283 			nic_offset -= (nic_delta_between_qmans * 2);
3284 			nic_offset += nic_delta_between_nics;
3285 		}
3286 
3287 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3288 	}
3289 }
3290 
3291 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3292 {
3293 	struct gaudi_device *gaudi = hdev->asic_specific;
3294 
3295 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3296 		return;
3297 
3298 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3299 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3300 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3301 }
3302 
3303 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3304 {
3305 	struct gaudi_device *gaudi = hdev->asic_specific;
3306 
3307 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3308 		return;
3309 
3310 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3311 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3312 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3313 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3314 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3315 }
3316 
3317 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3318 {
3319 	struct gaudi_device *gaudi = hdev->asic_specific;
3320 
3321 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3322 		return;
3323 
3324 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3325 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3326 }
3327 
3328 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3329 {
3330 	struct gaudi_device *gaudi = hdev->asic_specific;
3331 	u32 tpc_offset = 0;
3332 	int tpc_id;
3333 
3334 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3335 		return;
3336 
3337 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3338 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3339 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3340 	}
3341 }
3342 
3343 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3344 {
3345 	struct gaudi_device *gaudi = hdev->asic_specific;
3346 	u32 nic_mask, nic_offset = 0;
3347 	u32 nic_delta_between_qmans =
3348 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3349 	u32 nic_delta_between_nics =
3350 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3351 	int nic_id;
3352 
3353 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3354 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3355 
3356 		if (gaudi->hw_cap_initialized & nic_mask)
3357 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3358 
3359 		nic_offset += nic_delta_between_qmans;
3360 		if (nic_id & 1) {
3361 			nic_offset -= (nic_delta_between_qmans * 2);
3362 			nic_offset += nic_delta_between_nics;
3363 		}
3364 	}
3365 }
3366 
3367 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3368 {
3369 	struct gaudi_device *gaudi = hdev->asic_specific;
3370 
3371 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3372 		return;
3373 
3374 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3375 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3376 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3377 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3378 }
3379 
3380 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3381 {
3382 	struct gaudi_device *gaudi = hdev->asic_specific;
3383 
3384 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3385 		return;
3386 
3387 	/* Stop CPs of HBM DMA QMANs */
3388 
3389 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3390 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3391 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3392 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3393 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3394 }
3395 
3396 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3397 {
3398 	struct gaudi_device *gaudi = hdev->asic_specific;
3399 
3400 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3401 		return;
3402 
3403 	/* Stop CPs of MME QMANs */
3404 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3405 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3406 }
3407 
3408 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3409 {
3410 	struct gaudi_device *gaudi = hdev->asic_specific;
3411 
3412 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3413 		return;
3414 
3415 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3419 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3420 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3421 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3422 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3423 }
3424 
3425 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3426 {
3427 	struct gaudi_device *gaudi = hdev->asic_specific;
3428 
3429 	/* Stop upper CPs of QMANs */
3430 
3431 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3432 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3433 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3434 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3435 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3436 
3437 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3438 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3439 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3440 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3441 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3442 
3443 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3444 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3445 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3446 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3447 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3448 
3449 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3450 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3451 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3452 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3453 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3454 
3455 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3456 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3457 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3458 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3459 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3460 
3461 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3462 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3463 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3464 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3465 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3466 
3467 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3468 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3469 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3470 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3471 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3472 
3473 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3474 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3475 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3476 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3477 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3478 
3479 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3480 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3481 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3482 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3483 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3484 
3485 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3486 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3487 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3488 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3489 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3490 }
3491 
3492 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3493 {
3494 	struct gaudi_device *gaudi = hdev->asic_specific;
3495 
3496 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3497 		return;
3498 
3499 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3500 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3501 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3502 }
3503 
3504 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3505 {
3506 	struct gaudi_device *gaudi = hdev->asic_specific;
3507 
3508 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3509 		return;
3510 
3511 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3512 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3513 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3514 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3515 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3516 }
3517 
3518 static void gaudi_mme_stall(struct hl_device *hdev)
3519 {
3520 	struct gaudi_device *gaudi = hdev->asic_specific;
3521 
3522 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3523 		return;
3524 
3525 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3526 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3536 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3538 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3539 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3540 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3541 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3542 }
3543 
3544 static void gaudi_tpc_stall(struct hl_device *hdev)
3545 {
3546 	struct gaudi_device *gaudi = hdev->asic_specific;
3547 
3548 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3549 		return;
3550 
3551 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3555 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3556 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3557 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3558 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3559 }
3560 
3561 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3562 {
3563 	u32 qman_offset;
3564 	int i;
3565 
3566 	if (hdev->asic_prop.fw_security_enabled)
3567 		return;
3568 
3569 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3570 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3571 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3572 
3573 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3574 	}
3575 
3576 	WREG32(mmMME0_QM_CGM_CFG, 0);
3577 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3578 	WREG32(mmMME2_QM_CGM_CFG, 0);
3579 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3580 
3581 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3582 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3583 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3584 
3585 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3586 	}
3587 }
3588 
3589 static void gaudi_enable_timestamp(struct hl_device *hdev)
3590 {
3591 	/* Disable the timestamp counter */
3592 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3593 
3594 	/* Zero the lower/upper parts of the 64-bit counter */
3595 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3596 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3597 
3598 	/* Enable the counter */
3599 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3600 }
3601 
3602 static void gaudi_disable_timestamp(struct hl_device *hdev)
3603 {
3604 	/* Disable the timestamp counter */
3605 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3606 }
3607 
3608 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3609 {
3610 	u32 wait_timeout_ms;
3611 
3612 	if (hdev->pldm)
3613 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3614 	else
3615 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3616 
3617 	if (fw_reset)
3618 		goto skip_engines;
3619 
3620 	gaudi_stop_nic_qmans(hdev);
3621 	gaudi_stop_mme_qmans(hdev);
3622 	gaudi_stop_tpc_qmans(hdev);
3623 	gaudi_stop_hbm_dma_qmans(hdev);
3624 	gaudi_stop_pci_dma_qmans(hdev);
3625 
3626 	msleep(wait_timeout_ms);
3627 
3628 	gaudi_pci_dma_stall(hdev);
3629 	gaudi_hbm_dma_stall(hdev);
3630 	gaudi_tpc_stall(hdev);
3631 	gaudi_mme_stall(hdev);
3632 
3633 	msleep(wait_timeout_ms);
3634 
3635 	gaudi_disable_nic_qmans(hdev);
3636 	gaudi_disable_mme_qmans(hdev);
3637 	gaudi_disable_tpc_qmans(hdev);
3638 	gaudi_disable_hbm_dma_qmans(hdev);
3639 	gaudi_disable_pci_dma_qmans(hdev);
3640 
3641 	gaudi_disable_timestamp(hdev);
3642 
3643 skip_engines:
3644 	gaudi_disable_msi(hdev);
3645 }
3646 
3647 static int gaudi_mmu_init(struct hl_device *hdev)
3648 {
3649 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3650 	struct gaudi_device *gaudi = hdev->asic_specific;
3651 	u64 hop0_addr;
3652 	int rc, i;
3653 
3654 	if (!hdev->mmu_enable)
3655 		return 0;
3656 
3657 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3658 		return 0;
3659 
3660 	for (i = 0 ; i < prop->max_asid ; i++) {
3661 		hop0_addr = prop->mmu_pgt_addr +
3662 				(i * prop->mmu_hop_table_size);
3663 
3664 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3665 		if (rc) {
3666 			dev_err(hdev->dev,
3667 				"failed to set hop0 addr for asid %d\n", i);
3668 			return rc;
3669 		}
3670 	}
3671 
3672 	/* init MMU cache manage page */
3673 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3674 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3675 
3676 	/* mem cache invalidation */
3677 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3678 
3679 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3680 	if (rc)
3681 		return rc;
3682 
3683 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3684 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3685 
3686 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3687 
3688 	/*
3689 	 * The H/W expects the first PI after init to be 1. After wraparound
3690 	 * we'll write 0.
3691 	 */
3692 	gaudi->mmu_cache_inv_pi = 1;
3693 
3694 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3695 
3696 	return 0;
3697 }
3698 
3699 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3700 {
3701 	void __iomem *dst;
3702 
3703 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3704 
3705 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3706 }
3707 
3708 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3709 {
3710 	void __iomem *dst;
3711 
3712 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3713 
3714 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3715 }
3716 
3717 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3718 {
3719 	struct dynamic_fw_load_mgr *dynamic_loader;
3720 	struct cpu_dyn_regs *dyn_regs;
3721 
3722 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3723 
3724 	/*
3725 	 * here we update initial values for few specific dynamic regs (as
3726 	 * before reading the first descriptor from FW those value has to be
3727 	 * hard-coded) in later stages of the protocol those values will be
3728 	 * updated automatically by reading the FW descriptor so data there
3729 	 * will always be up-to-date
3730 	 */
3731 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3732 	dyn_regs->kmd_msg_to_cpu =
3733 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3734 	dyn_regs->cpu_cmd_status_to_host =
3735 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3736 
3737 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3738 }
3739 
3740 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3741 {
3742 	struct static_fw_load_mgr *static_loader;
3743 
3744 	static_loader = &hdev->fw_loader.static_loader;
3745 
3746 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3747 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3748 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3749 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3750 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3751 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3752 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3753 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3754 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3755 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3756 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3757 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3758 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3759 			GAUDI_PLDM_RESET_WAIT_MSEC :
3760 			GAUDI_CPU_RESET_WAIT_MSEC;
3761 }
3762 
3763 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3764 {
3765 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3766 
3767 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3768 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3769 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3770 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3771 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3772 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3773 }
3774 
3775 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3776 {
3777 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3778 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3779 
3780 	/* fill common fields */
3781 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3782 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3783 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3784 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3785 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3786 	fw_loader->skip_bmc = !hdev->bmc_enable;
3787 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3788 	fw_loader->dram_bar_id = HBM_BAR_ID;
3789 
3790 	if (prop->dynamic_fw_load)
3791 		gaudi_init_dynamic_firmware_loader(hdev);
3792 	else
3793 		gaudi_init_static_firmware_loader(hdev);
3794 }
3795 
3796 static int gaudi_init_cpu(struct hl_device *hdev)
3797 {
3798 	struct gaudi_device *gaudi = hdev->asic_specific;
3799 	int rc;
3800 
3801 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3802 		return 0;
3803 
3804 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3805 		return 0;
3806 
3807 	/*
3808 	 * The device CPU works with 40 bits addresses.
3809 	 * This register sets the extension to 50 bits.
3810 	 */
3811 	if (!hdev->asic_prop.fw_security_enabled)
3812 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3813 
3814 	rc = hl_fw_init_cpu(hdev);
3815 
3816 	if (rc)
3817 		return rc;
3818 
3819 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3820 
3821 	return 0;
3822 }
3823 
3824 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3825 {
3826 	struct cpu_dyn_regs *dyn_regs =
3827 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3828 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3829 	struct gaudi_device *gaudi = hdev->asic_specific;
3830 	u32 status, irq_handler_offset;
3831 	struct hl_eq *eq;
3832 	struct hl_hw_queue *cpu_pq =
3833 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3834 	int err;
3835 
3836 	if (!hdev->cpu_queues_enable)
3837 		return 0;
3838 
3839 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3840 		return 0;
3841 
3842 	eq = &hdev->event_queue;
3843 
3844 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3845 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3846 
3847 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3848 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3849 
3850 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3851 			lower_32_bits(hdev->cpu_accessible_dma_address));
3852 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3853 			upper_32_bits(hdev->cpu_accessible_dma_address));
3854 
3855 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3856 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3857 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3858 
3859 	/* Used for EQ CI */
3860 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3861 
3862 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3863 
3864 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3865 
3866 	irq_handler_offset = prop->gic_interrupts_enable ?
3867 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3868 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3869 
3870 	WREG32(irq_handler_offset,
3871 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3872 
3873 	err = hl_poll_timeout(
3874 		hdev,
3875 		mmCPU_IF_QUEUE_INIT,
3876 		status,
3877 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3878 		1000,
3879 		cpu_timeout);
3880 
3881 	if (err) {
3882 		dev_err(hdev->dev,
3883 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3884 		return -EIO;
3885 	}
3886 
3887 	/* update FW application security bits */
3888 	if (prop->fw_cpu_boot_dev_sts0_valid)
3889 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3890 	if (prop->fw_cpu_boot_dev_sts1_valid)
3891 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3892 
3893 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3894 	return 0;
3895 }
3896 
3897 static void gaudi_pre_hw_init(struct hl_device *hdev)
3898 {
3899 	/* Perform read from the device to make sure device is up */
3900 	RREG32(mmHW_STATE);
3901 
3902 	if (!hdev->asic_prop.fw_security_enabled) {
3903 		/* Set the access through PCI bars (Linux driver only) as
3904 		 * secured
3905 		 */
3906 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3907 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3908 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3909 
3910 		/* Perform read to flush the waiting writes to ensure
3911 		 * configuration was set in the device
3912 		 */
3913 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3914 	}
3915 
3916 	/*
3917 	 * Let's mark in the H/W that we have reached this point. We check
3918 	 * this value in the reset_before_init function to understand whether
3919 	 * we need to reset the chip before doing H/W init. This register is
3920 	 * cleared by the H/W upon H/W reset
3921 	 */
3922 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3923 }
3924 
3925 static int gaudi_hw_init(struct hl_device *hdev)
3926 {
3927 	struct gaudi_device *gaudi = hdev->asic_specific;
3928 	int rc;
3929 
3930 	gaudi_pre_hw_init(hdev);
3931 
3932 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3933 	 * So we set it here and if anyone tries to move it later to
3934 	 * a different address, there will be an error
3935 	 */
3936 	if (hdev->asic_prop.iatu_done_by_fw)
3937 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3938 
3939 	/*
3940 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3941 	 * base address of dram
3942 	 */
3943 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3944 		dev_err(hdev->dev,
3945 			"failed to map HBM bar to DRAM base address\n");
3946 		return -EIO;
3947 	}
3948 
3949 	rc = gaudi_init_cpu(hdev);
3950 	if (rc) {
3951 		dev_err(hdev->dev, "failed to initialize CPU\n");
3952 		return rc;
3953 	}
3954 
3955 	/* In case the clock gating was enabled in preboot we need to disable
3956 	 * it here before touching the MME/TPC registers.
3957 	 */
3958 	gaudi_disable_clock_gating(hdev);
3959 
3960 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3961 	gaudi_init_scrambler_sram(hdev);
3962 
3963 	/* This is here just in case we are working without CPU */
3964 	gaudi_init_scrambler_hbm(hdev);
3965 
3966 	gaudi_init_golden_registers(hdev);
3967 
3968 	rc = gaudi_mmu_init(hdev);
3969 	if (rc)
3970 		return rc;
3971 
3972 	gaudi_init_security(hdev);
3973 
3974 	gaudi_init_pci_dma_qmans(hdev);
3975 
3976 	gaudi_init_hbm_dma_qmans(hdev);
3977 
3978 	gaudi_init_mme_qmans(hdev);
3979 
3980 	gaudi_init_tpc_qmans(hdev);
3981 
3982 	gaudi_init_nic_qmans(hdev);
3983 
3984 	gaudi_enable_timestamp(hdev);
3985 
3986 	/* MSI must be enabled before CPU queues and NIC are initialized */
3987 	rc = gaudi_enable_msi(hdev);
3988 	if (rc)
3989 		goto disable_queues;
3990 
3991 	/* must be called after MSI was enabled */
3992 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3993 	if (rc) {
3994 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3995 			rc);
3996 		goto disable_msi;
3997 	}
3998 
3999 	/* Perform read from the device to flush all configuration */
4000 	RREG32(mmHW_STATE);
4001 
4002 	return 0;
4003 
4004 disable_msi:
4005 	gaudi_disable_msi(hdev);
4006 disable_queues:
4007 	gaudi_disable_mme_qmans(hdev);
4008 	gaudi_disable_pci_dma_qmans(hdev);
4009 
4010 	return rc;
4011 }
4012 
4013 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4014 {
4015 	struct cpu_dyn_regs *dyn_regs =
4016 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4017 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4018 	struct gaudi_device *gaudi = hdev->asic_specific;
4019 	bool driver_performs_reset;
4020 
4021 	if (!hard_reset) {
4022 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4023 		return 0;
4024 	}
4025 
4026 	if (hdev->pldm) {
4027 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4028 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4029 	} else {
4030 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4031 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4032 	}
4033 
4034 	if (fw_reset) {
4035 		dev_dbg(hdev->dev,
4036 			"Firmware performs HARD reset, going to wait %dms\n",
4037 			reset_timeout_ms);
4038 
4039 		goto skip_reset;
4040 	}
4041 
4042 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4043 					!hdev->asic_prop.hard_reset_done_by_fw);
4044 
4045 	/* Set device to handle FLR by H/W as we will put the device CPU to
4046 	 * halt mode
4047 	 */
4048 	if (driver_performs_reset)
4049 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4050 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4051 
4052 	/* If linux is loaded in the device CPU we need to communicate with it
4053 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4054 	 * registers in case of old F/Ws
4055 	 */
4056 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4057 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4058 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4059 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4060 
4061 		WREG32(irq_handler_offset,
4062 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4063 
4064 		/* This is a hail-mary attempt to revive the card in the small chance that the
4065 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4066 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4067 		 * reset as if Linux wasn't loaded.
4068 		 *
4069 		 * We do it only if the reset cause was HB, because that would be the indication
4070 		 * of such an event.
4071 		 *
4072 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4073 		 * damage.
4074 		 */
4075 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4076 			if (hdev->asic_prop.hard_reset_done_by_fw)
4077 				hl_fw_ask_hard_reset_without_linux(hdev);
4078 			else
4079 				hl_fw_ask_halt_machine_without_linux(hdev);
4080 		}
4081 	} else {
4082 		if (hdev->asic_prop.hard_reset_done_by_fw)
4083 			hl_fw_ask_hard_reset_without_linux(hdev);
4084 		else
4085 			hl_fw_ask_halt_machine_without_linux(hdev);
4086 	}
4087 
4088 	if (driver_performs_reset) {
4089 
4090 		/* Configure the reset registers. Must be done as early as
4091 		 * possible in case we fail during H/W initialization
4092 		 */
4093 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4094 						(CFG_RST_H_DMA_MASK |
4095 						CFG_RST_H_MME_MASK |
4096 						CFG_RST_H_SM_MASK |
4097 						CFG_RST_H_TPC_7_MASK));
4098 
4099 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4100 
4101 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4102 						(CFG_RST_H_HBM_MASK |
4103 						CFG_RST_H_TPC_7_MASK |
4104 						CFG_RST_H_NIC_MASK |
4105 						CFG_RST_H_SM_MASK |
4106 						CFG_RST_H_DMA_MASK |
4107 						CFG_RST_H_MME_MASK |
4108 						CFG_RST_H_CPU_MASK |
4109 						CFG_RST_H_MMU_MASK));
4110 
4111 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4112 						(CFG_RST_L_IF_MASK |
4113 						CFG_RST_L_PSOC_MASK |
4114 						CFG_RST_L_TPC_MASK));
4115 
4116 		msleep(cpu_timeout_ms);
4117 
4118 		/* Tell ASIC not to re-initialize PCIe */
4119 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4120 
4121 		/* Restart BTL/BLR upon hard-reset */
4122 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4123 
4124 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4125 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4126 
4127 		dev_dbg(hdev->dev,
4128 			"Issued HARD reset command, going to wait %dms\n",
4129 			reset_timeout_ms);
4130 	} else {
4131 		dev_dbg(hdev->dev,
4132 			"Firmware performs HARD reset, going to wait %dms\n",
4133 			reset_timeout_ms);
4134 	}
4135 
4136 skip_reset:
4137 	/*
4138 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4139 	 * itself is in reset. Need to wait until the reset is deasserted
4140 	 */
4141 	msleep(reset_timeout_ms);
4142 
4143 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4144 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4145 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4146 		return -ETIMEDOUT;
4147 	}
4148 
4149 	if (gaudi) {
4150 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4151 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4152 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4153 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4154 						HW_CAP_HBM_SCRAMBLER);
4155 
4156 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4157 
4158 		hdev->device_cpu_is_halted = false;
4159 	}
4160 	return 0;
4161 }
4162 
4163 static int gaudi_suspend(struct hl_device *hdev)
4164 {
4165 	int rc;
4166 
4167 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4168 	if (rc)
4169 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4170 
4171 	return rc;
4172 }
4173 
4174 static int gaudi_resume(struct hl_device *hdev)
4175 {
4176 	return gaudi_init_iatu(hdev);
4177 }
4178 
4179 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4180 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4181 {
4182 	int rc;
4183 
4184 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4185 			VM_DONTCOPY | VM_NORESERVE);
4186 
4187 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4188 				(dma_addr - HOST_PHYS_BASE), size);
4189 	if (rc)
4190 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4191 
4192 	return rc;
4193 }
4194 
4195 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4196 {
4197 	struct cpu_dyn_regs *dyn_regs =
4198 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4199 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4200 	struct gaudi_device *gaudi = hdev->asic_specific;
4201 	bool invalid_queue = false;
4202 	int dma_id;
4203 
4204 	switch (hw_queue_id) {
4205 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4206 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4207 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4208 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4209 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4210 		break;
4211 
4212 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4213 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4214 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4215 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4216 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4217 		break;
4218 
4219 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4220 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4221 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4222 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4223 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4224 		break;
4225 
4226 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4227 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4228 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4229 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4230 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4231 		break;
4232 
4233 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4234 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4235 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4236 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4237 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4238 		break;
4239 
4240 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4241 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4242 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4243 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4244 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4245 		break;
4246 
4247 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4248 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4249 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4250 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4251 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4252 		break;
4253 
4254 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4255 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4256 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4257 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4258 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4259 		break;
4260 
4261 	case GAUDI_QUEUE_ID_CPU_PQ:
4262 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4263 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4264 		else
4265 			invalid_queue = true;
4266 		break;
4267 
4268 	case GAUDI_QUEUE_ID_MME_0_0:
4269 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4270 		break;
4271 
4272 	case GAUDI_QUEUE_ID_MME_0_1:
4273 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4274 		break;
4275 
4276 	case GAUDI_QUEUE_ID_MME_0_2:
4277 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4278 		break;
4279 
4280 	case GAUDI_QUEUE_ID_MME_0_3:
4281 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4282 		break;
4283 
4284 	case GAUDI_QUEUE_ID_MME_1_0:
4285 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4286 		break;
4287 
4288 	case GAUDI_QUEUE_ID_MME_1_1:
4289 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4290 		break;
4291 
4292 	case GAUDI_QUEUE_ID_MME_1_2:
4293 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4294 		break;
4295 
4296 	case GAUDI_QUEUE_ID_MME_1_3:
4297 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4298 		break;
4299 
4300 	case GAUDI_QUEUE_ID_TPC_0_0:
4301 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4302 		break;
4303 
4304 	case GAUDI_QUEUE_ID_TPC_0_1:
4305 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4306 		break;
4307 
4308 	case GAUDI_QUEUE_ID_TPC_0_2:
4309 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4310 		break;
4311 
4312 	case GAUDI_QUEUE_ID_TPC_0_3:
4313 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4314 		break;
4315 
4316 	case GAUDI_QUEUE_ID_TPC_1_0:
4317 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4318 		break;
4319 
4320 	case GAUDI_QUEUE_ID_TPC_1_1:
4321 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4322 		break;
4323 
4324 	case GAUDI_QUEUE_ID_TPC_1_2:
4325 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4326 		break;
4327 
4328 	case GAUDI_QUEUE_ID_TPC_1_3:
4329 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4330 		break;
4331 
4332 	case GAUDI_QUEUE_ID_TPC_2_0:
4333 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4334 		break;
4335 
4336 	case GAUDI_QUEUE_ID_TPC_2_1:
4337 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4338 		break;
4339 
4340 	case GAUDI_QUEUE_ID_TPC_2_2:
4341 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4342 		break;
4343 
4344 	case GAUDI_QUEUE_ID_TPC_2_3:
4345 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4346 		break;
4347 
4348 	case GAUDI_QUEUE_ID_TPC_3_0:
4349 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4350 		break;
4351 
4352 	case GAUDI_QUEUE_ID_TPC_3_1:
4353 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4354 		break;
4355 
4356 	case GAUDI_QUEUE_ID_TPC_3_2:
4357 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4358 		break;
4359 
4360 	case GAUDI_QUEUE_ID_TPC_3_3:
4361 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4362 		break;
4363 
4364 	case GAUDI_QUEUE_ID_TPC_4_0:
4365 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4366 		break;
4367 
4368 	case GAUDI_QUEUE_ID_TPC_4_1:
4369 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4370 		break;
4371 
4372 	case GAUDI_QUEUE_ID_TPC_4_2:
4373 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4374 		break;
4375 
4376 	case GAUDI_QUEUE_ID_TPC_4_3:
4377 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4378 		break;
4379 
4380 	case GAUDI_QUEUE_ID_TPC_5_0:
4381 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4382 		break;
4383 
4384 	case GAUDI_QUEUE_ID_TPC_5_1:
4385 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4386 		break;
4387 
4388 	case GAUDI_QUEUE_ID_TPC_5_2:
4389 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4390 		break;
4391 
4392 	case GAUDI_QUEUE_ID_TPC_5_3:
4393 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4394 		break;
4395 
4396 	case GAUDI_QUEUE_ID_TPC_6_0:
4397 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4398 		break;
4399 
4400 	case GAUDI_QUEUE_ID_TPC_6_1:
4401 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4402 		break;
4403 
4404 	case GAUDI_QUEUE_ID_TPC_6_2:
4405 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4406 		break;
4407 
4408 	case GAUDI_QUEUE_ID_TPC_6_3:
4409 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4410 		break;
4411 
4412 	case GAUDI_QUEUE_ID_TPC_7_0:
4413 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4414 		break;
4415 
4416 	case GAUDI_QUEUE_ID_TPC_7_1:
4417 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4418 		break;
4419 
4420 	case GAUDI_QUEUE_ID_TPC_7_2:
4421 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4422 		break;
4423 
4424 	case GAUDI_QUEUE_ID_TPC_7_3:
4425 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4426 		break;
4427 
4428 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4429 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4430 			invalid_queue = true;
4431 
4432 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4434 		break;
4435 
4436 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4437 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4438 			invalid_queue = true;
4439 
4440 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4445 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4446 			invalid_queue = true;
4447 
4448 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4450 		break;
4451 
4452 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4453 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4454 			invalid_queue = true;
4455 
4456 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4458 		break;
4459 
4460 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4461 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4462 			invalid_queue = true;
4463 
4464 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4466 		break;
4467 
4468 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4469 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4470 			invalid_queue = true;
4471 
4472 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4474 		break;
4475 
4476 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4477 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4478 			invalid_queue = true;
4479 
4480 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4482 		break;
4483 
4484 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4485 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4486 			invalid_queue = true;
4487 
4488 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4490 		break;
4491 
4492 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4493 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4494 			invalid_queue = true;
4495 
4496 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4497 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4498 		break;
4499 
4500 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4501 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4502 			invalid_queue = true;
4503 
4504 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4505 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4506 		break;
4507 
4508 	default:
4509 		invalid_queue = true;
4510 	}
4511 
4512 	if (invalid_queue) {
4513 		/* Should never get here */
4514 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4515 			hw_queue_id);
4516 		return;
4517 	}
4518 
4519 	db_value = pi;
4520 
4521 	/* ring the doorbell */
4522 	WREG32(db_reg_offset, db_value);
4523 
4524 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4525 		/* make sure device CPU will read latest data from host */
4526 		mb();
4527 
4528 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4529 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4530 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4531 
4532 		WREG32(irq_handler_offset,
4533 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4534 	}
4535 }
4536 
4537 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4538 				struct hl_bd *bd)
4539 {
4540 	__le64 *pbd = (__le64 *) bd;
4541 
4542 	/* The QMANs are on the host memory so a simple copy suffice */
4543 	pqe[0] = pbd[0];
4544 	pqe[1] = pbd[1];
4545 }
4546 
4547 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4548 					dma_addr_t *dma_handle, gfp_t flags)
4549 {
4550 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4551 						dma_handle, flags);
4552 
4553 	/* Shift to the device's base physical address of host memory */
4554 	if (kernel_addr)
4555 		*dma_handle += HOST_PHYS_BASE;
4556 
4557 	return kernel_addr;
4558 }
4559 
4560 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4561 		void *cpu_addr, dma_addr_t dma_handle)
4562 {
4563 	/* Cancel the device's base physical address of host memory */
4564 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4565 
4566 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4567 }
4568 
4569 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4570 {
4571 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4572 	u64 cur_addr = prop->dram_user_base_address;
4573 	u32 chunk_size, busy;
4574 	int rc, dma_id;
4575 
4576 	while (cur_addr < prop->dram_end_address) {
4577 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4578 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4579 
4580 			chunk_size =
4581 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4582 
4583 			dev_dbg(hdev->dev,
4584 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4585 				cur_addr, cur_addr + chunk_size);
4586 
4587 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4588 					lower_32_bits(val));
4589 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4590 					upper_32_bits(val));
4591 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4592 						lower_32_bits(cur_addr));
4593 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4594 						upper_32_bits(cur_addr));
4595 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4596 					chunk_size);
4597 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4598 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4599 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4600 
4601 			cur_addr += chunk_size;
4602 
4603 			if (cur_addr == prop->dram_end_address)
4604 				break;
4605 		}
4606 
4607 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4608 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4609 
4610 			rc = hl_poll_timeout(
4611 				hdev,
4612 				mmDMA0_CORE_STS0 + dma_offset,
4613 				busy,
4614 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4615 				1000,
4616 				HBM_SCRUBBING_TIMEOUT_US);
4617 
4618 			if (rc) {
4619 				dev_err(hdev->dev,
4620 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4621 					dma_id);
4622 				return -EIO;
4623 			}
4624 		}
4625 	}
4626 
4627 	return 0;
4628 }
4629 
4630 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4631 {
4632 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4633 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4634 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4635 	u64 addr, size, val = hdev->memory_scrub_val;
4636 	ktime_t timeout;
4637 	int rc = 0;
4638 
4639 	if (!hdev->memory_scrub)
4640 		return 0;
4641 
4642 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4643 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4644 		if (ktime_compare(ktime_get(), timeout) > 0) {
4645 			dev_err(hdev->dev, "waiting for idle timeout\n");
4646 			return -ETIMEDOUT;
4647 		}
4648 		usleep_range((1000 >> 2) + 1, 1000);
4649 	}
4650 
4651 	/* Scrub SRAM */
4652 	addr = prop->sram_user_base_address;
4653 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4654 
4655 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4656 			addr, addr + size, val);
4657 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4658 	if (rc) {
4659 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4660 		return rc;
4661 	}
4662 
4663 	/* Scrub HBM using all DMA channels in parallel */
4664 	rc = gaudi_scrub_device_dram(hdev, val);
4665 	if (rc) {
4666 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4667 		return rc;
4668 	}
4669 
4670 	return 0;
4671 }
4672 
4673 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4674 				u32 queue_id, dma_addr_t *dma_handle,
4675 				u16 *queue_len)
4676 {
4677 	struct gaudi_device *gaudi = hdev->asic_specific;
4678 	struct gaudi_internal_qman_info *q;
4679 
4680 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4681 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4682 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4683 		return NULL;
4684 	}
4685 
4686 	q = &gaudi->internal_qmans[queue_id];
4687 	*dma_handle = q->pq_dma_addr;
4688 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4689 
4690 	return q->pq_kernel_addr;
4691 }
4692 
4693 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4694 				u16 len, u32 timeout, u64 *result)
4695 {
4696 	struct gaudi_device *gaudi = hdev->asic_specific;
4697 
4698 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4699 		if (result)
4700 			*result = 0;
4701 		return 0;
4702 	}
4703 
4704 	if (!timeout)
4705 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4706 
4707 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4708 						timeout, result);
4709 }
4710 
4711 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4712 {
4713 	struct packet_msg_prot *fence_pkt;
4714 	dma_addr_t pkt_dma_addr;
4715 	u32 fence_val, tmp, timeout_usec;
4716 	dma_addr_t fence_dma_addr;
4717 	u32 *fence_ptr;
4718 	int rc;
4719 
4720 	if (hdev->pldm)
4721 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4722 	else
4723 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4724 
4725 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4726 
4727 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4728 	if (!fence_ptr) {
4729 		dev_err(hdev->dev,
4730 			"Failed to allocate memory for H/W queue %d testing\n",
4731 			hw_queue_id);
4732 		return -ENOMEM;
4733 	}
4734 
4735 	*fence_ptr = 0;
4736 
4737 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4738 						&pkt_dma_addr);
4739 	if (!fence_pkt) {
4740 		dev_err(hdev->dev,
4741 			"Failed to allocate packet for H/W queue %d testing\n",
4742 			hw_queue_id);
4743 		rc = -ENOMEM;
4744 		goto free_fence_ptr;
4745 	}
4746 
4747 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4748 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4749 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4750 
4751 	fence_pkt->ctl = cpu_to_le32(tmp);
4752 	fence_pkt->value = cpu_to_le32(fence_val);
4753 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4754 
4755 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4756 					sizeof(struct packet_msg_prot),
4757 					pkt_dma_addr);
4758 	if (rc) {
4759 		dev_err(hdev->dev,
4760 			"Failed to send fence packet to H/W queue %d\n",
4761 			hw_queue_id);
4762 		goto free_pkt;
4763 	}
4764 
4765 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4766 					1000, timeout_usec, true);
4767 
4768 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4769 
4770 	if (rc == -ETIMEDOUT) {
4771 		dev_err(hdev->dev,
4772 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4773 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4774 		rc = -EIO;
4775 	}
4776 
4777 free_pkt:
4778 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4779 free_fence_ptr:
4780 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4781 	return rc;
4782 }
4783 
4784 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4785 {
4786 	struct gaudi_device *gaudi = hdev->asic_specific;
4787 
4788 	/*
4789 	 * check capability here as send_cpu_message() won't update the result
4790 	 * value if no capability
4791 	 */
4792 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4793 		return 0;
4794 
4795 	return hl_fw_test_cpu_queue(hdev);
4796 }
4797 
4798 static int gaudi_test_queues(struct hl_device *hdev)
4799 {
4800 	int i, rc, ret_val = 0;
4801 
4802 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4803 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4804 			rc = gaudi_test_queue(hdev, i);
4805 			if (rc)
4806 				ret_val = -EINVAL;
4807 		}
4808 	}
4809 
4810 	rc = gaudi_test_cpu_queue(hdev);
4811 	if (rc)
4812 		ret_val = -EINVAL;
4813 
4814 	return ret_val;
4815 }
4816 
4817 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4818 		gfp_t mem_flags, dma_addr_t *dma_handle)
4819 {
4820 	void *kernel_addr;
4821 
4822 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4823 		return NULL;
4824 
4825 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4826 
4827 	/* Shift to the device's base physical address of host memory */
4828 	if (kernel_addr)
4829 		*dma_handle += HOST_PHYS_BASE;
4830 
4831 	return kernel_addr;
4832 }
4833 
4834 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4835 			dma_addr_t dma_addr)
4836 {
4837 	/* Cancel the device's base physical address of host memory */
4838 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4839 
4840 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4841 }
4842 
4843 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4844 					size_t size, dma_addr_t *dma_handle)
4845 {
4846 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4847 }
4848 
4849 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4850 						size_t size, void *vaddr)
4851 {
4852 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4853 }
4854 
4855 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4856 {
4857 	struct scatterlist *sg, *sg_next_iter;
4858 	u32 count, dma_desc_cnt;
4859 	u64 len, len_next;
4860 	dma_addr_t addr, addr_next;
4861 
4862 	dma_desc_cnt = 0;
4863 
4864 	for_each_sgtable_dma_sg(sgt, sg, count) {
4865 		len = sg_dma_len(sg);
4866 		addr = sg_dma_address(sg);
4867 
4868 		if (len == 0)
4869 			break;
4870 
4871 		while ((count + 1) < sgt->nents) {
4872 			sg_next_iter = sg_next(sg);
4873 			len_next = sg_dma_len(sg_next_iter);
4874 			addr_next = sg_dma_address(sg_next_iter);
4875 
4876 			if (len_next == 0)
4877 				break;
4878 
4879 			if ((addr + len == addr_next) &&
4880 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4881 				len += len_next;
4882 				count++;
4883 				sg = sg_next_iter;
4884 			} else {
4885 				break;
4886 			}
4887 		}
4888 
4889 		dma_desc_cnt++;
4890 	}
4891 
4892 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4893 }
4894 
4895 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4896 				struct hl_cs_parser *parser,
4897 				struct packet_lin_dma *user_dma_pkt,
4898 				u64 addr, enum dma_data_direction dir)
4899 {
4900 	struct hl_userptr *userptr;
4901 	int rc;
4902 
4903 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4904 			parser->job_userptr_list, &userptr))
4905 		goto already_pinned;
4906 
4907 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4908 	if (!userptr)
4909 		return -ENOMEM;
4910 
4911 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4912 				userptr);
4913 	if (rc)
4914 		goto free_userptr;
4915 
4916 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4917 
4918 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4919 	if (rc) {
4920 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4921 		goto unpin_memory;
4922 	}
4923 
4924 	userptr->dma_mapped = true;
4925 	userptr->dir = dir;
4926 
4927 already_pinned:
4928 	parser->patched_cb_size +=
4929 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4930 
4931 	return 0;
4932 
4933 unpin_memory:
4934 	list_del(&userptr->job_node);
4935 	hl_unpin_host_memory(hdev, userptr);
4936 free_userptr:
4937 	kfree(userptr);
4938 	return rc;
4939 }
4940 
4941 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4942 				struct hl_cs_parser *parser,
4943 				struct packet_lin_dma *user_dma_pkt,
4944 				bool src_in_host)
4945 {
4946 	enum dma_data_direction dir;
4947 	bool skip_host_mem_pin = false, user_memset;
4948 	u64 addr;
4949 	int rc = 0;
4950 
4951 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4952 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4953 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4954 
4955 	if (src_in_host) {
4956 		if (user_memset)
4957 			skip_host_mem_pin = true;
4958 
4959 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4960 		dir = DMA_TO_DEVICE;
4961 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4962 	} else {
4963 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4964 		dir = DMA_FROM_DEVICE;
4965 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4966 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4967 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4968 	}
4969 
4970 	if (skip_host_mem_pin)
4971 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4972 	else
4973 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4974 						addr, dir);
4975 
4976 	return rc;
4977 }
4978 
4979 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4980 				struct hl_cs_parser *parser,
4981 				struct packet_lin_dma *user_dma_pkt)
4982 {
4983 	bool src_in_host = false;
4984 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4985 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4986 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4987 
4988 	dev_dbg(hdev->dev, "DMA packet details:\n");
4989 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4990 				le64_to_cpu(user_dma_pkt->src_addr));
4991 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4992 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4993 
4994 	/*
4995 	 * Special handling for DMA with size 0. Bypass all validations
4996 	 * because no transactions will be done except for WR_COMP, which
4997 	 * is not a security issue
4998 	 */
4999 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5000 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5001 		return 0;
5002 	}
5003 
5004 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5005 		src_in_host = true;
5006 
5007 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5008 						src_in_host);
5009 }
5010 
5011 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5012 					struct hl_cs_parser *parser,
5013 					struct packet_load_and_exe *user_pkt)
5014 {
5015 	u32 cfg;
5016 
5017 	cfg = le32_to_cpu(user_pkt->cfg);
5018 
5019 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5020 		dev_err(hdev->dev,
5021 			"User not allowed to use Load and Execute\n");
5022 		return -EPERM;
5023 	}
5024 
5025 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5026 
5027 	return 0;
5028 }
5029 
5030 static int gaudi_validate_cb(struct hl_device *hdev,
5031 			struct hl_cs_parser *parser, bool is_mmu)
5032 {
5033 	u32 cb_parsed_length = 0;
5034 	int rc = 0;
5035 
5036 	parser->patched_cb_size = 0;
5037 
5038 	/* cb_user_size is more than 0 so loop will always be executed */
5039 	while (cb_parsed_length < parser->user_cb_size) {
5040 		enum packet_id pkt_id;
5041 		u16 pkt_size;
5042 		struct gaudi_packet *user_pkt;
5043 
5044 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5045 
5046 		pkt_id = (enum packet_id) (
5047 				(le64_to_cpu(user_pkt->header) &
5048 				PACKET_HEADER_PACKET_ID_MASK) >>
5049 					PACKET_HEADER_PACKET_ID_SHIFT);
5050 
5051 		if (!validate_packet_id(pkt_id)) {
5052 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5053 			rc = -EINVAL;
5054 			break;
5055 		}
5056 
5057 		pkt_size = gaudi_packet_sizes[pkt_id];
5058 		cb_parsed_length += pkt_size;
5059 		if (cb_parsed_length > parser->user_cb_size) {
5060 			dev_err(hdev->dev,
5061 				"packet 0x%x is out of CB boundary\n", pkt_id);
5062 			rc = -EINVAL;
5063 			break;
5064 		}
5065 
5066 		switch (pkt_id) {
5067 		case PACKET_MSG_PROT:
5068 			dev_err(hdev->dev,
5069 				"User not allowed to use MSG_PROT\n");
5070 			rc = -EPERM;
5071 			break;
5072 
5073 		case PACKET_CP_DMA:
5074 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5075 			rc = -EPERM;
5076 			break;
5077 
5078 		case PACKET_STOP:
5079 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5080 			rc = -EPERM;
5081 			break;
5082 
5083 		case PACKET_WREG_BULK:
5084 			dev_err(hdev->dev,
5085 				"User not allowed to use WREG_BULK\n");
5086 			rc = -EPERM;
5087 			break;
5088 
5089 		case PACKET_LOAD_AND_EXE:
5090 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5091 				(struct packet_load_and_exe *) user_pkt);
5092 			break;
5093 
5094 		case PACKET_LIN_DMA:
5095 			parser->contains_dma_pkt = true;
5096 			if (is_mmu)
5097 				parser->patched_cb_size += pkt_size;
5098 			else
5099 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5100 					(struct packet_lin_dma *) user_pkt);
5101 			break;
5102 
5103 		case PACKET_WREG_32:
5104 		case PACKET_MSG_LONG:
5105 		case PACKET_MSG_SHORT:
5106 		case PACKET_REPEAT:
5107 		case PACKET_FENCE:
5108 		case PACKET_NOP:
5109 		case PACKET_ARB_POINT:
5110 			parser->patched_cb_size += pkt_size;
5111 			break;
5112 
5113 		default:
5114 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5115 				pkt_id);
5116 			rc = -EINVAL;
5117 			break;
5118 		}
5119 
5120 		if (rc)
5121 			break;
5122 	}
5123 
5124 	/*
5125 	 * The new CB should have space at the end for two MSG_PROT packets:
5126 	 * 1. Optional NOP padding for cacheline alignment
5127 	 * 2. A packet that will act as a completion packet
5128 	 * 3. A packet that will generate MSI interrupt
5129 	 */
5130 	if (parser->completion)
5131 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5132 			parser->patched_cb_size);
5133 
5134 	return rc;
5135 }
5136 
5137 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5138 				struct hl_cs_parser *parser,
5139 				struct packet_lin_dma *user_dma_pkt,
5140 				struct packet_lin_dma *new_dma_pkt,
5141 				u32 *new_dma_pkt_size)
5142 {
5143 	struct hl_userptr *userptr;
5144 	struct scatterlist *sg, *sg_next_iter;
5145 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5146 	u64 len, len_next;
5147 	dma_addr_t dma_addr, dma_addr_next;
5148 	u64 device_memory_addr, addr;
5149 	enum dma_data_direction dir;
5150 	struct sg_table *sgt;
5151 	bool src_in_host = false;
5152 	bool skip_host_mem_pin = false;
5153 	bool user_memset;
5154 
5155 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5156 
5157 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5158 		src_in_host = true;
5159 
5160 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5161 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5162 
5163 	if (src_in_host) {
5164 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5165 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5166 		dir = DMA_TO_DEVICE;
5167 		if (user_memset)
5168 			skip_host_mem_pin = true;
5169 	} else {
5170 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5171 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5172 		dir = DMA_FROM_DEVICE;
5173 	}
5174 
5175 	if ((!skip_host_mem_pin) &&
5176 		(!hl_userptr_is_pinned(hdev, addr,
5177 					le32_to_cpu(user_dma_pkt->tsize),
5178 					parser->job_userptr_list, &userptr))) {
5179 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5180 				addr, user_dma_pkt->tsize);
5181 		return -EFAULT;
5182 	}
5183 
5184 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5185 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5186 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5187 		return 0;
5188 	}
5189 
5190 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5191 
5192 	sgt = userptr->sgt;
5193 	dma_desc_cnt = 0;
5194 
5195 	for_each_sgtable_dma_sg(sgt, sg, count) {
5196 		len = sg_dma_len(sg);
5197 		dma_addr = sg_dma_address(sg);
5198 
5199 		if (len == 0)
5200 			break;
5201 
5202 		while ((count + 1) < sgt->nents) {
5203 			sg_next_iter = sg_next(sg);
5204 			len_next = sg_dma_len(sg_next_iter);
5205 			dma_addr_next = sg_dma_address(sg_next_iter);
5206 
5207 			if (len_next == 0)
5208 				break;
5209 
5210 			if ((dma_addr + len == dma_addr_next) &&
5211 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5212 				len += len_next;
5213 				count++;
5214 				sg = sg_next_iter;
5215 			} else {
5216 				break;
5217 			}
5218 		}
5219 
5220 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5221 		if (likely(dma_desc_cnt))
5222 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5223 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5224 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5225 		new_dma_pkt->tsize = cpu_to_le32(len);
5226 
5227 		if (dir == DMA_TO_DEVICE) {
5228 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5229 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5230 		} else {
5231 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5232 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5233 		}
5234 
5235 		if (!user_memset)
5236 			device_memory_addr += len;
5237 		dma_desc_cnt++;
5238 		new_dma_pkt++;
5239 	}
5240 
5241 	if (!dma_desc_cnt) {
5242 		dev_err(hdev->dev,
5243 			"Error of 0 SG entries when patching DMA packet\n");
5244 		return -EFAULT;
5245 	}
5246 
5247 	/* Fix the last dma packet - wrcomp must be as user set it */
5248 	new_dma_pkt--;
5249 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5250 
5251 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5252 
5253 	return 0;
5254 }
5255 
5256 static int gaudi_patch_cb(struct hl_device *hdev,
5257 				struct hl_cs_parser *parser)
5258 {
5259 	u32 cb_parsed_length = 0;
5260 	u32 cb_patched_cur_length = 0;
5261 	int rc = 0;
5262 
5263 	/* cb_user_size is more than 0 so loop will always be executed */
5264 	while (cb_parsed_length < parser->user_cb_size) {
5265 		enum packet_id pkt_id;
5266 		u16 pkt_size;
5267 		u32 new_pkt_size = 0;
5268 		struct gaudi_packet *user_pkt, *kernel_pkt;
5269 
5270 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5271 		kernel_pkt = parser->patched_cb->kernel_address +
5272 					cb_patched_cur_length;
5273 
5274 		pkt_id = (enum packet_id) (
5275 				(le64_to_cpu(user_pkt->header) &
5276 				PACKET_HEADER_PACKET_ID_MASK) >>
5277 					PACKET_HEADER_PACKET_ID_SHIFT);
5278 
5279 		if (!validate_packet_id(pkt_id)) {
5280 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5281 			rc = -EINVAL;
5282 			break;
5283 		}
5284 
5285 		pkt_size = gaudi_packet_sizes[pkt_id];
5286 		cb_parsed_length += pkt_size;
5287 		if (cb_parsed_length > parser->user_cb_size) {
5288 			dev_err(hdev->dev,
5289 				"packet 0x%x is out of CB boundary\n", pkt_id);
5290 			rc = -EINVAL;
5291 			break;
5292 		}
5293 
5294 		switch (pkt_id) {
5295 		case PACKET_LIN_DMA:
5296 			rc = gaudi_patch_dma_packet(hdev, parser,
5297 					(struct packet_lin_dma *) user_pkt,
5298 					(struct packet_lin_dma *) kernel_pkt,
5299 					&new_pkt_size);
5300 			cb_patched_cur_length += new_pkt_size;
5301 			break;
5302 
5303 		case PACKET_MSG_PROT:
5304 			dev_err(hdev->dev,
5305 				"User not allowed to use MSG_PROT\n");
5306 			rc = -EPERM;
5307 			break;
5308 
5309 		case PACKET_CP_DMA:
5310 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5311 			rc = -EPERM;
5312 			break;
5313 
5314 		case PACKET_STOP:
5315 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5316 			rc = -EPERM;
5317 			break;
5318 
5319 		case PACKET_WREG_32:
5320 		case PACKET_WREG_BULK:
5321 		case PACKET_MSG_LONG:
5322 		case PACKET_MSG_SHORT:
5323 		case PACKET_REPEAT:
5324 		case PACKET_FENCE:
5325 		case PACKET_NOP:
5326 		case PACKET_ARB_POINT:
5327 		case PACKET_LOAD_AND_EXE:
5328 			memcpy(kernel_pkt, user_pkt, pkt_size);
5329 			cb_patched_cur_length += pkt_size;
5330 			break;
5331 
5332 		default:
5333 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5334 				pkt_id);
5335 			rc = -EINVAL;
5336 			break;
5337 		}
5338 
5339 		if (rc)
5340 			break;
5341 	}
5342 
5343 	return rc;
5344 }
5345 
5346 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5347 		struct hl_cs_parser *parser)
5348 {
5349 	u64 handle;
5350 	u32 patched_cb_size;
5351 	struct hl_cb *user_cb;
5352 	int rc;
5353 
5354 	/*
5355 	 * The new CB should have space at the end for two MSG_PROT packets:
5356 	 * 1. Optional NOP padding for cacheline alignment
5357 	 * 2. A packet that will act as a completion packet
5358 	 * 3. A packet that will generate MSI interrupt
5359 	 */
5360 	if (parser->completion)
5361 		parser->patched_cb_size = parser->user_cb_size +
5362 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5363 	else
5364 		parser->patched_cb_size = parser->user_cb_size;
5365 
5366 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5367 				parser->patched_cb_size, false, false,
5368 				&handle);
5369 
5370 	if (rc) {
5371 		dev_err(hdev->dev,
5372 			"Failed to allocate patched CB for DMA CS %d\n",
5373 			rc);
5374 		return rc;
5375 	}
5376 
5377 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5378 	/* hl_cb_get should never fail */
5379 	if (!parser->patched_cb) {
5380 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5381 		rc = -EFAULT;
5382 		goto out;
5383 	}
5384 
5385 	/*
5386 	 * We are protected from overflow because the check
5387 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5388 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5389 	 *
5390 	 * There is no option to reach here without going through that check because:
5391 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5392 	 *    an external queue.
5393 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5394 	 */
5395 	memcpy(parser->patched_cb->kernel_address,
5396 		parser->user_cb->kernel_address,
5397 		parser->user_cb_size);
5398 
5399 	patched_cb_size = parser->patched_cb_size;
5400 
5401 	/* Validate patched CB instead of user CB */
5402 	user_cb = parser->user_cb;
5403 	parser->user_cb = parser->patched_cb;
5404 	rc = gaudi_validate_cb(hdev, parser, true);
5405 	parser->user_cb = user_cb;
5406 
5407 	if (rc) {
5408 		hl_cb_put(parser->patched_cb);
5409 		goto out;
5410 	}
5411 
5412 	if (patched_cb_size != parser->patched_cb_size) {
5413 		dev_err(hdev->dev, "user CB size mismatch\n");
5414 		hl_cb_put(parser->patched_cb);
5415 		rc = -EINVAL;
5416 		goto out;
5417 	}
5418 
5419 out:
5420 	/*
5421 	 * Always call cb destroy here because we still have 1 reference
5422 	 * to it by calling cb_get earlier. After the job will be completed,
5423 	 * cb_put will release it, but here we want to remove it from the
5424 	 * idr
5425 	 */
5426 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5427 
5428 	return rc;
5429 }
5430 
5431 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5432 		struct hl_cs_parser *parser)
5433 {
5434 	u64 handle;
5435 	int rc;
5436 
5437 	rc = gaudi_validate_cb(hdev, parser, false);
5438 
5439 	if (rc)
5440 		goto free_userptr;
5441 
5442 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5443 				parser->patched_cb_size, false, false,
5444 				&handle);
5445 	if (rc) {
5446 		dev_err(hdev->dev,
5447 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5448 		goto free_userptr;
5449 	}
5450 
5451 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5452 	/* hl_cb_get should never fail here */
5453 	if (!parser->patched_cb) {
5454 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5455 		rc = -EFAULT;
5456 		goto out;
5457 	}
5458 
5459 	rc = gaudi_patch_cb(hdev, parser);
5460 
5461 	if (rc)
5462 		hl_cb_put(parser->patched_cb);
5463 
5464 out:
5465 	/*
5466 	 * Always call cb destroy here because we still have 1 reference
5467 	 * to it by calling cb_get earlier. After the job will be completed,
5468 	 * cb_put will release it, but here we want to remove it from the
5469 	 * idr
5470 	 */
5471 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5472 
5473 free_userptr:
5474 	if (rc)
5475 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5476 	return rc;
5477 }
5478 
5479 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5480 					struct hl_cs_parser *parser)
5481 {
5482 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5483 	struct gaudi_device *gaudi = hdev->asic_specific;
5484 	u32 nic_queue_offset, nic_mask_q_id;
5485 
5486 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5487 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5488 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5489 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5490 
5491 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5492 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5493 			return -EINVAL;
5494 		}
5495 	}
5496 
5497 	/* For internal queue jobs just check if CB address is valid */
5498 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5499 					parser->user_cb_size,
5500 					asic_prop->sram_user_base_address,
5501 					asic_prop->sram_end_address))
5502 		return 0;
5503 
5504 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5505 					parser->user_cb_size,
5506 					asic_prop->dram_user_base_address,
5507 					asic_prop->dram_end_address))
5508 		return 0;
5509 
5510 	/* PMMU and HPMMU addresses are equal, check only one of them */
5511 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5512 					parser->user_cb_size,
5513 					asic_prop->pmmu.start_addr,
5514 					asic_prop->pmmu.end_addr))
5515 		return 0;
5516 
5517 	dev_err(hdev->dev,
5518 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5519 		parser->user_cb, parser->user_cb_size);
5520 
5521 	return -EFAULT;
5522 }
5523 
5524 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5525 {
5526 	struct gaudi_device *gaudi = hdev->asic_specific;
5527 
5528 	if (parser->queue_type == QUEUE_TYPE_INT)
5529 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5530 
5531 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5532 		return gaudi_parse_cb_mmu(hdev, parser);
5533 	else
5534 		return gaudi_parse_cb_no_mmu(hdev, parser);
5535 }
5536 
5537 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5538 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5539 				u32 msi_vec, bool eb)
5540 {
5541 	struct packet_msg_prot *cq_pkt;
5542 	struct packet_nop *cq_padding;
5543 	u64 msi_addr;
5544 	u32 tmp;
5545 
5546 	cq_padding = kernel_address + original_len;
5547 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5548 
5549 	while ((void *)cq_padding < (void *)cq_pkt) {
5550 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5551 		cq_padding++;
5552 	}
5553 
5554 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5555 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5556 
5557 	if (eb)
5558 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5559 
5560 	cq_pkt->ctl = cpu_to_le32(tmp);
5561 	cq_pkt->value = cpu_to_le32(cq_val);
5562 	cq_pkt->addr = cpu_to_le64(cq_addr);
5563 
5564 	cq_pkt++;
5565 
5566 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5567 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5568 	cq_pkt->ctl = cpu_to_le32(tmp);
5569 	cq_pkt->value = cpu_to_le32(1);
5570 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5571 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5572 }
5573 
5574 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5575 {
5576 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5577 }
5578 
5579 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5580 					u32 size, u64 val)
5581 {
5582 	struct packet_lin_dma *lin_dma_pkt;
5583 	struct hl_cs_job *job;
5584 	u32 cb_size, ctl, err_cause;
5585 	struct hl_cb *cb;
5586 	int rc;
5587 
5588 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5589 	if (!cb)
5590 		return -EFAULT;
5591 
5592 	lin_dma_pkt = cb->kernel_address;
5593 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5594 	cb_size = sizeof(*lin_dma_pkt);
5595 
5596 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5597 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5598 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5599 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5600 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5601 
5602 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5603 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5604 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5605 	lin_dma_pkt->tsize = cpu_to_le32(size);
5606 
5607 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5608 	if (!job) {
5609 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5610 		rc = -ENOMEM;
5611 		goto release_cb;
5612 	}
5613 
5614 	/* Verify DMA is OK */
5615 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5616 	if (err_cause && !hdev->init_done) {
5617 		dev_dbg(hdev->dev,
5618 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5619 			err_cause);
5620 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5621 	}
5622 
5623 	job->id = 0;
5624 	job->user_cb = cb;
5625 	atomic_inc(&job->user_cb->cs_cnt);
5626 	job->user_cb_size = cb_size;
5627 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5628 	job->patched_cb = job->user_cb;
5629 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5630 
5631 	hl_debugfs_add_job(hdev, job);
5632 
5633 	rc = gaudi_send_job_on_qman0(hdev, job);
5634 	hl_debugfs_remove_job(hdev, job);
5635 	kfree(job);
5636 	atomic_dec(&cb->cs_cnt);
5637 
5638 	/* Verify DMA is OK */
5639 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5640 	if (err_cause) {
5641 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5642 		rc = -EIO;
5643 		if (!hdev->init_done) {
5644 			dev_dbg(hdev->dev,
5645 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5646 				err_cause);
5647 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5648 		}
5649 	}
5650 
5651 release_cb:
5652 	hl_cb_put(cb);
5653 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5654 
5655 	return rc;
5656 }
5657 
5658 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5659 					u32 num_regs, u32 val)
5660 {
5661 	struct packet_msg_long *pkt;
5662 	struct hl_cs_job *job;
5663 	u32 cb_size, ctl;
5664 	struct hl_cb *cb;
5665 	int i, rc;
5666 
5667 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5668 
5669 	if (cb_size > SZ_2M) {
5670 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5671 		return -ENOMEM;
5672 	}
5673 
5674 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5675 	if (!cb)
5676 		return -EFAULT;
5677 
5678 	pkt = cb->kernel_address;
5679 
5680 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5681 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5682 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5683 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5684 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5685 
5686 	for (i = 0; i < num_regs ; i++, pkt++) {
5687 		pkt->ctl = cpu_to_le32(ctl);
5688 		pkt->value = cpu_to_le32(val);
5689 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5690 	}
5691 
5692 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5693 	if (!job) {
5694 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5695 		rc = -ENOMEM;
5696 		goto release_cb;
5697 	}
5698 
5699 	job->id = 0;
5700 	job->user_cb = cb;
5701 	atomic_inc(&job->user_cb->cs_cnt);
5702 	job->user_cb_size = cb_size;
5703 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5704 	job->patched_cb = job->user_cb;
5705 	job->job_cb_size = cb_size;
5706 
5707 	hl_debugfs_add_job(hdev, job);
5708 
5709 	rc = gaudi_send_job_on_qman0(hdev, job);
5710 	hl_debugfs_remove_job(hdev, job);
5711 	kfree(job);
5712 	atomic_dec(&cb->cs_cnt);
5713 
5714 release_cb:
5715 	hl_cb_put(cb);
5716 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5717 
5718 	return rc;
5719 }
5720 
5721 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5722 {
5723 	u64 base_addr;
5724 	u32 num_regs;
5725 	int rc;
5726 
5727 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5728 	num_regs = NUM_OF_SOB_IN_BLOCK;
5729 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5730 	if (rc) {
5731 		dev_err(hdev->dev, "failed resetting SM registers");
5732 		return -ENOMEM;
5733 	}
5734 
5735 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5736 	num_regs = NUM_OF_SOB_IN_BLOCK;
5737 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5738 	if (rc) {
5739 		dev_err(hdev->dev, "failed resetting SM registers");
5740 		return -ENOMEM;
5741 	}
5742 
5743 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5744 	num_regs = NUM_OF_SOB_IN_BLOCK;
5745 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5746 	if (rc) {
5747 		dev_err(hdev->dev, "failed resetting SM registers");
5748 		return -ENOMEM;
5749 	}
5750 
5751 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5752 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5753 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5754 	if (rc) {
5755 		dev_err(hdev->dev, "failed resetting SM registers");
5756 		return -ENOMEM;
5757 	}
5758 
5759 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5760 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5761 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5762 	if (rc) {
5763 		dev_err(hdev->dev, "failed resetting SM registers");
5764 		return -ENOMEM;
5765 	}
5766 
5767 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5768 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5769 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5770 	if (rc) {
5771 		dev_err(hdev->dev, "failed resetting SM registers");
5772 		return -ENOMEM;
5773 	}
5774 
5775 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5776 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5777 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5778 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5779 	if (rc) {
5780 		dev_err(hdev->dev, "failed resetting SM registers");
5781 		return -ENOMEM;
5782 	}
5783 
5784 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5785 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5786 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5787 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5788 	if (rc) {
5789 		dev_err(hdev->dev, "failed resetting SM registers");
5790 		return -ENOMEM;
5791 	}
5792 
5793 	return 0;
5794 }
5795 
5796 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5797 {
5798 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5799 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5800 	int i;
5801 
5802 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5803 		u64 sob_addr = CFG_BASE +
5804 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5805 				(i * sob_delta);
5806 		u32 dma_offset = i * DMA_CORE_OFFSET;
5807 
5808 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5809 				lower_32_bits(sob_addr));
5810 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5811 				upper_32_bits(sob_addr));
5812 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5813 
5814 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5815 		 * modified by the user for SRAM reduction
5816 		 */
5817 		if (i > 1)
5818 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5819 								0x00000001);
5820 	}
5821 }
5822 
5823 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5824 {
5825 	u32 qman_offset;
5826 	int i;
5827 
5828 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5829 		qman_offset = i * DMA_QMAN_OFFSET;
5830 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5831 	}
5832 
5833 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5834 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5835 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5836 	}
5837 
5838 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5839 		qman_offset = i * TPC_QMAN_OFFSET;
5840 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5841 	}
5842 
5843 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5844 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5845 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5846 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5847 	}
5848 }
5849 
5850 static int gaudi_restore_user_registers(struct hl_device *hdev)
5851 {
5852 	int rc;
5853 
5854 	rc = gaudi_restore_sm_registers(hdev);
5855 	if (rc)
5856 		return rc;
5857 
5858 	gaudi_restore_dma_registers(hdev);
5859 	gaudi_restore_qm_registers(hdev);
5860 
5861 	return 0;
5862 }
5863 
5864 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5865 {
5866 	return 0;
5867 }
5868 
5869 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5870 {
5871 	u32 size = hdev->asic_prop.mmu_pgt_size +
5872 			hdev->asic_prop.mmu_cache_mng_size;
5873 	struct gaudi_device *gaudi = hdev->asic_specific;
5874 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5875 
5876 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5877 		return 0;
5878 
5879 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5880 }
5881 
5882 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5883 {
5884 
5885 }
5886 
5887 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5888 					u32 size_to_dma, dma_addr_t dma_addr)
5889 {
5890 	u32 err_cause, val;
5891 	u64 dma_offset;
5892 	int rc;
5893 
5894 	dma_offset = dma_id * DMA_CORE_OFFSET;
5895 
5896 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5897 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5898 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5899 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5900 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5901 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5902 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5903 
5904 	rc = hl_poll_timeout(
5905 		hdev,
5906 		mmDMA0_CORE_STS0 + dma_offset,
5907 		val,
5908 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5909 		0,
5910 		1000000);
5911 
5912 	if (rc) {
5913 		dev_err(hdev->dev,
5914 			"DMA %d timed-out during reading of 0x%llx\n",
5915 			dma_id, addr);
5916 		return -EIO;
5917 	}
5918 
5919 	/* Verify DMA is OK */
5920 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5921 	if (err_cause) {
5922 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5923 		dev_dbg(hdev->dev,
5924 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5925 			err_cause);
5926 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5927 
5928 		return -EIO;
5929 	}
5930 
5931 	return 0;
5932 }
5933 
5934 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5935 				void *blob_addr)
5936 {
5937 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5938 	u32 qm_glbl_sts0, qm_cgm_sts;
5939 	u64 dma_offset, qm_offset;
5940 	dma_addr_t dma_addr;
5941 	void *kernel_addr;
5942 	bool is_eng_idle;
5943 	int rc = 0, dma_id;
5944 
5945 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5946 
5947 	if (!kernel_addr)
5948 		return -ENOMEM;
5949 
5950 	hdev->asic_funcs->hw_queues_lock(hdev);
5951 
5952 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5953 	dma_offset = dma_id * DMA_CORE_OFFSET;
5954 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5955 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5956 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5957 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5958 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5959 		      IS_DMA_IDLE(dma_core_sts0);
5960 
5961 	if (!is_eng_idle) {
5962 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5963 		dma_offset = dma_id * DMA_CORE_OFFSET;
5964 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5965 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5966 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5967 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5968 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5969 			      IS_DMA_IDLE(dma_core_sts0);
5970 
5971 		if (!is_eng_idle) {
5972 			dev_err_ratelimited(hdev->dev,
5973 				"Can't read via DMA because it is BUSY\n");
5974 			rc = -EAGAIN;
5975 			goto out;
5976 		}
5977 	}
5978 
5979 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5980 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5981 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5982 
5983 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5984 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5985 	 * ASID
5986 	 */
5987 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5988 
5989 	/* Verify DMA is OK */
5990 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5991 	if (err_cause) {
5992 		dev_dbg(hdev->dev,
5993 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5994 			err_cause);
5995 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5996 	}
5997 
5998 	pos = 0;
5999 	size_left = size;
6000 	size_to_dma = SZ_2M;
6001 
6002 	while (size_left > 0) {
6003 
6004 		if (size_left < SZ_2M)
6005 			size_to_dma = size_left;
6006 
6007 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6008 						dma_addr);
6009 		if (rc)
6010 			break;
6011 
6012 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6013 
6014 		if (size_left <= SZ_2M)
6015 			break;
6016 
6017 		pos += SZ_2M;
6018 		addr += SZ_2M;
6019 		size_left -= SZ_2M;
6020 	}
6021 
6022 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6023 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6024 	 * ASID
6025 	 */
6026 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6027 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6028 
6029 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6030 
6031 out:
6032 	hdev->asic_funcs->hw_queues_unlock(hdev);
6033 
6034 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6035 
6036 	return rc;
6037 }
6038 
6039 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6040 {
6041 	struct gaudi_device *gaudi = hdev->asic_specific;
6042 
6043 	if (hdev->reset_info.hard_reset_pending)
6044 		return U64_MAX;
6045 
6046 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6047 			(addr - gaudi->hbm_bar_cur_addr));
6048 }
6049 
6050 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6051 {
6052 	struct gaudi_device *gaudi = hdev->asic_specific;
6053 
6054 	if (hdev->reset_info.hard_reset_pending)
6055 		return;
6056 
6057 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6058 			(addr - gaudi->hbm_bar_cur_addr));
6059 }
6060 
6061 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6062 {
6063 	/* mask to zero the MMBP and ASID bits */
6064 	WREG32_AND(reg, ~0x7FF);
6065 	WREG32_OR(reg, asid);
6066 }
6067 
6068 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6069 {
6070 	struct gaudi_device *gaudi = hdev->asic_specific;
6071 
6072 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6073 		return;
6074 
6075 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6076 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6077 		return;
6078 	}
6079 
6080 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6081 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6082 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6083 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6085 
6086 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6087 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6088 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6089 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6091 
6092 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6093 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6094 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6095 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6097 
6098 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6099 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6100 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6101 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6103 
6104 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6105 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6106 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6107 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6109 
6110 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6111 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6112 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6113 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6115 
6116 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6117 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6118 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6119 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6120 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6121 
6122 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6123 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6124 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6125 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6126 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6127 
6128 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6129 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6130 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6133 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6134 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6135 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6136 
6137 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6138 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6141 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6144 
6145 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6146 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6152 
6153 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6160 
6161 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6168 
6169 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6176 
6177 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6184 
6185 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6192 
6193 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6200 
6201 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6211 
6212 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6213 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6214 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6215 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6221 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6222 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6223 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6224 
6225 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6226 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6227 				asid);
6228 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6229 				asid);
6230 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6231 				asid);
6232 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6233 				asid);
6234 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6235 				asid);
6236 	}
6237 
6238 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6239 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6240 				asid);
6241 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6242 				asid);
6243 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6244 				asid);
6245 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6246 				asid);
6247 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6248 				asid);
6249 	}
6250 
6251 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6252 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6253 				asid);
6254 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6255 				asid);
6256 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6257 				asid);
6258 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6259 				asid);
6260 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6261 				asid);
6262 	}
6263 
6264 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6265 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6266 				asid);
6267 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6268 				asid);
6269 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6270 				asid);
6271 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6272 				asid);
6273 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6274 				asid);
6275 	}
6276 
6277 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6278 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6279 				asid);
6280 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6281 				asid);
6282 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6283 				asid);
6284 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6285 				asid);
6286 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6287 				asid);
6288 	}
6289 
6290 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6291 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6292 				asid);
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6296 				asid);
6297 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6298 				asid);
6299 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6300 				asid);
6301 	}
6302 
6303 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6305 				asid);
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6309 				asid);
6310 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6311 				asid);
6312 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6313 				asid);
6314 	}
6315 
6316 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6318 				asid);
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6322 				asid);
6323 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6324 				asid);
6325 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6326 				asid);
6327 	}
6328 
6329 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6331 				asid);
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6335 				asid);
6336 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6337 				asid);
6338 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6339 				asid);
6340 	}
6341 
6342 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6344 				asid);
6345 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6346 				asid);
6347 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6348 				asid);
6349 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6350 				asid);
6351 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6352 				asid);
6353 	}
6354 
6355 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6356 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6357 }
6358 
6359 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6360 		struct hl_cs_job *job)
6361 {
6362 	struct packet_msg_prot *fence_pkt;
6363 	u32 *fence_ptr;
6364 	dma_addr_t fence_dma_addr;
6365 	struct hl_cb *cb;
6366 	u32 tmp, timeout, dma_offset;
6367 	int rc;
6368 
6369 	if (hdev->pldm)
6370 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6371 	else
6372 		timeout = HL_DEVICE_TIMEOUT_USEC;
6373 
6374 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6375 	if (!fence_ptr) {
6376 		dev_err(hdev->dev,
6377 			"Failed to allocate fence memory for QMAN0\n");
6378 		return -ENOMEM;
6379 	}
6380 
6381 	cb = job->patched_cb;
6382 
6383 	fence_pkt = cb->kernel_address +
6384 			job->job_cb_size - sizeof(struct packet_msg_prot);
6385 
6386 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6387 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6388 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6389 
6390 	fence_pkt->ctl = cpu_to_le32(tmp);
6391 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6392 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6393 
6394 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6395 
6396 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6397 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6398 
6399 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6400 					job->job_cb_size, cb->bus_address);
6401 	if (rc) {
6402 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6403 		goto free_fence_ptr;
6404 	}
6405 
6406 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6407 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6408 				timeout, true);
6409 
6410 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6411 
6412 	if (rc == -ETIMEDOUT) {
6413 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6414 		goto free_fence_ptr;
6415 	}
6416 
6417 free_fence_ptr:
6418 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6419 
6420 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6421 	return rc;
6422 }
6423 
6424 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6425 {
6426 	if (event_type >= GAUDI_EVENT_SIZE)
6427 		goto event_not_supported;
6428 
6429 	if (!gaudi_irq_map_table[event_type].valid)
6430 		goto event_not_supported;
6431 
6432 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6433 
6434 	return;
6435 
6436 event_not_supported:
6437 	snprintf(desc, size, "N/A");
6438 }
6439 
6440 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6441 							bool is_write, u16 *engine_id_1,
6442 							u16 *engine_id_2)
6443 {
6444 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6445 
6446 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6447 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6448 
6449 	switch (x_y) {
6450 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6451 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6452 		dma_id[0] = 0;
6453 		dma_id[1] = 2;
6454 		break;
6455 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6456 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6457 		dma_id[0] = 1;
6458 		dma_id[1] = 3;
6459 		break;
6460 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6461 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6462 		dma_id[0] = 4;
6463 		dma_id[1] = 6;
6464 		break;
6465 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6466 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6467 		dma_id[0] = 5;
6468 		dma_id[1] = 7;
6469 		break;
6470 	default:
6471 		goto unknown_initiator;
6472 	}
6473 
6474 	for (i = 0 ; i < 2 ; i++) {
6475 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6476 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6477 	}
6478 
6479 	switch (x_y) {
6480 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6481 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6482 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6483 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6484 			return "DMA0";
6485 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6486 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6487 			return "DMA2";
6488 		} else {
6489 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6490 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6491 			return "DMA0 or DMA2";
6492 		}
6493 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6494 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6495 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6496 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6497 			return "DMA1";
6498 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6499 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6500 			return "DMA3";
6501 		} else {
6502 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6503 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6504 			return "DMA1 or DMA3";
6505 		}
6506 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6507 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6508 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6509 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6510 			return "DMA4";
6511 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6512 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6513 			return "DMA6";
6514 		} else {
6515 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6516 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6517 			return "DMA4 or DMA6";
6518 		}
6519 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6520 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6521 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6522 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6523 			return "DMA5";
6524 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6525 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6526 			return "DMA7";
6527 		} else {
6528 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6529 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6530 			return "DMA5 or DMA7";
6531 		}
6532 	}
6533 
6534 unknown_initiator:
6535 	return "unknown initiator";
6536 }
6537 
6538 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6539 							u16 *engine_id_1, u16 *engine_id_2)
6540 {
6541 	u32 val, x_y, axi_id;
6542 
6543 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6544 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6545 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6546 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6547 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6548 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6549 
6550 	switch (x_y) {
6551 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6552 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6553 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6554 			return "TPC0";
6555 		}
6556 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6557 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6558 			return "NIC0";
6559 		}
6560 		break;
6561 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6562 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6563 		return "TPC1";
6564 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6565 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6566 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6567 		return "MME0";
6568 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6569 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6570 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6571 		return "MME1";
6572 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6573 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6574 		return "TPC2";
6575 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6576 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6577 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6578 			return "TPC3";
6579 		}
6580 		/* PCI, CPU or PSOC does not have engine id*/
6581 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6582 			return "PCI";
6583 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6584 			return "CPU";
6585 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6586 			return "PSOC";
6587 		break;
6588 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6589 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6590 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6591 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6592 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6593 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6594 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6595 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6596 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6597 				engine_id_1, engine_id_2);
6598 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6599 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6600 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6601 			return "TPC4";
6602 		}
6603 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6604 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6605 			return "NIC1";
6606 		}
6607 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6608 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6609 			return "NIC2";
6610 		}
6611 		break;
6612 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6613 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6614 		return "TPC5";
6615 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6616 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6617 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6618 		return "MME2";
6619 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6620 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6621 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6622 		return "MME3";
6623 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6624 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6625 		return "TPC6";
6626 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6627 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6628 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6629 			return "TPC7";
6630 		}
6631 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6632 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6633 			return "NIC4";
6634 		}
6635 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6636 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6637 			return "NIC5";
6638 		}
6639 		break;
6640 	default:
6641 		break;
6642 	}
6643 
6644 	dev_err(hdev->dev,
6645 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6646 		val,
6647 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6648 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6649 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6650 			RAZWI_INITIATOR_AXI_ID_MASK);
6651 
6652 	return "unknown initiator";
6653 }
6654 
6655 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6656 						u16 *engine_id_2, bool *is_read, bool *is_write)
6657 {
6658 
6659 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6660 		dev_err_ratelimited(hdev->dev,
6661 			"RAZWI event caused by illegal write of %s\n",
6662 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6663 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6664 		*is_write = true;
6665 	}
6666 
6667 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6668 		dev_err_ratelimited(hdev->dev,
6669 			"RAZWI event caused by illegal read of %s\n",
6670 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6671 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6672 		*is_read = true;
6673 	}
6674 }
6675 
6676 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6677 {
6678 	struct gaudi_device *gaudi = hdev->asic_specific;
6679 	u32 val;
6680 
6681 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6682 		return;
6683 
6684 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6685 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6686 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6687 		*addr <<= 32;
6688 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6689 
6690 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6691 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6692 
6693 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6694 	}
6695 
6696 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6697 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6698 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6699 		*addr <<= 32;
6700 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6701 
6702 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6703 
6704 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6705 	}
6706 }
6707 
6708 /*
6709  *  +-------------------+------------------------------------------------------+
6710  *  | Configuration Reg |                     Description                      |
6711  *  |      Address      |                                                      |
6712  *  +-------------------+------------------------------------------------------+
6713  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6714  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6715  *  |                   |0xF34 memory wrappers 63:32                           |
6716  *  |                   |0xF38 memory wrappers 95:64                           |
6717  *  |                   |0xF3C memory wrappers 127:96                          |
6718  *  +-------------------+------------------------------------------------------+
6719  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6720  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6721  *  |                   |0xF44 memory wrappers 63:32                           |
6722  *  |                   |0xF48 memory wrappers 95:64                           |
6723  *  |                   |0xF4C memory wrappers 127:96                          |
6724  *  +-------------------+------------------------------------------------------+
6725  */
6726 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6727 		struct ecc_info_extract_params *params, u64 *ecc_address,
6728 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6729 {
6730 	u32 i, num_mem_regs, reg, err_bit;
6731 	u64 err_addr, err_word = 0;
6732 
6733 	num_mem_regs = params->num_memories / 32 +
6734 			((params->num_memories % 32) ? 1 : 0);
6735 
6736 	if (params->block_address >= CFG_BASE)
6737 		params->block_address -= CFG_BASE;
6738 
6739 	if (params->derr)
6740 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6741 	else
6742 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6743 
6744 	/* Set invalid wrapper index */
6745 	*memory_wrapper_idx = 0xFF;
6746 
6747 	/* Iterate through memory wrappers, a single bit must be set */
6748 	for (i = 0 ; i < num_mem_regs ; i++) {
6749 		err_addr += i * 4;
6750 		err_word = RREG32(err_addr);
6751 		if (err_word) {
6752 			err_bit = __ffs(err_word);
6753 			*memory_wrapper_idx = err_bit + (32 * i);
6754 			break;
6755 		}
6756 	}
6757 
6758 	if (*memory_wrapper_idx == 0xFF) {
6759 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6760 		return -EINVAL;
6761 	}
6762 
6763 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6764 			*memory_wrapper_idx);
6765 
6766 	*ecc_address =
6767 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6768 	*ecc_syndrom =
6769 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6770 
6771 	/* Clear error indication */
6772 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6773 	if (params->derr)
6774 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6775 	else
6776 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6777 
6778 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6779 
6780 	return 0;
6781 }
6782 
6783 /*
6784  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6785  *
6786  * @idx: the current pi/ci value
6787  * @q_len: the queue length (power of 2)
6788  *
6789  * @return the cyclically decremented index
6790  */
6791 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6792 {
6793 	u32 mask = q_len - 1;
6794 
6795 	/*
6796 	 * modular decrement is equivalent to adding (queue_size -1)
6797 	 * later we take LSBs to make sure the value is in the
6798 	 * range [0, queue_len - 1]
6799 	 */
6800 	return (idx + q_len - 1) & mask;
6801 }
6802 
6803 /**
6804  * gaudi_handle_sw_config_stream_data - print SW config stream data
6805  *
6806  * @hdev: pointer to the habanalabs device structure
6807  * @stream: the QMAN's stream
6808  * @qman_base: base address of QMAN registers block
6809  * @event_mask: mask of the last events occurred
6810  */
6811 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6812 						u64 qman_base, u64 event_mask)
6813 {
6814 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6815 	u32 cq_ptr_lo_off, size;
6816 
6817 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6818 
6819 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6820 						stream * cq_ptr_lo_off;
6821 	cq_ptr_hi = cq_ptr_lo +
6822 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6823 	cq_tsize = cq_ptr_lo +
6824 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6825 
6826 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6827 	size = RREG32(cq_tsize);
6828 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6829 							stream, cq_ptr, size);
6830 
6831 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6832 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6833 		hdev->captured_err_info.undef_opcode.cq_size = size;
6834 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6835 	}
6836 }
6837 
6838 /**
6839  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6840  *
6841  * @hdev: pointer to the habanalabs device structure
6842  * @qid_base: first QID of the QMAN (out of 4 streams)
6843  * @stream: the QMAN's stream
6844  * @qman_base: base address of QMAN registers block
6845  * @event_mask: mask of the last events occurred
6846  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6847  */
6848 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6849 						u32 stream, u64 qman_base,
6850 						u64 event_mask,
6851 						bool pr_sw_conf)
6852 {
6853 	u32 ci, qm_ci_stream_off, queue_len;
6854 	struct hl_hw_queue *q;
6855 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6856 	int i;
6857 
6858 	q = &hdev->kernel_queues[qid_base + stream];
6859 
6860 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6861 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6862 						stream * qm_ci_stream_off;
6863 
6864 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6865 					q->int_queue_len : HL_QUEUE_LENGTH;
6866 
6867 	hdev->asic_funcs->hw_queues_lock(hdev);
6868 
6869 	if (pr_sw_conf)
6870 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6871 
6872 	ci = RREG32(pq_ci);
6873 
6874 	/* we should start printing form ci -1 */
6875 	ci = gaudi_queue_idx_dec(ci, queue_len);
6876 	memset(addr, 0, sizeof(addr));
6877 
6878 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6879 		struct hl_bd *bd;
6880 		u32 len;
6881 
6882 		bd = q->kernel_address;
6883 		bd += ci;
6884 
6885 		len = le32_to_cpu(bd->len);
6886 		/* len 0 means uninitialized entry- break */
6887 		if (!len)
6888 			break;
6889 
6890 		addr[i] = le64_to_cpu(bd->ptr);
6891 
6892 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6893 							stream, ci, addr[i], len);
6894 
6895 		/* get previous ci, wrap if needed */
6896 		ci = gaudi_queue_idx_dec(ci, queue_len);
6897 	}
6898 
6899 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6900 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6901 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6902 
6903 		if (arr_idx == 0) {
6904 			undef_opcode->timestamp = ktime_get();
6905 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6906 		}
6907 
6908 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6909 		undef_opcode->cb_addr_streams_len++;
6910 	}
6911 
6912 	hdev->asic_funcs->hw_queues_unlock(hdev);
6913 }
6914 
6915 /**
6916  * handle_qman_data_on_err - extract QMAN data on error
6917  *
6918  * @hdev: pointer to the habanalabs device structure
6919  * @qid_base: first QID of the QMAN (out of 4 streams)
6920  * @stream: the QMAN's stream
6921  * @qman_base: base address of QMAN registers block
6922  * @event_mask: mask of the last events occurred
6923  *
6924  * This function attempt to exatract as much data as possible on QMAN error.
6925  * On upper CP print the SW config stream data and last 8 PQEs.
6926  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6927  */
6928 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6929 				   u32 stream, u64 qman_base, u64 event_mask)
6930 {
6931 	u32 i;
6932 
6933 	if (stream != QMAN_STREAMS) {
6934 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6935 			qman_base, event_mask, true);
6936 		return;
6937 	}
6938 
6939 	/* handle Lower-CP */
6940 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6941 
6942 	for (i = 0; i < QMAN_STREAMS; i++)
6943 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6944 			qman_base, event_mask, false);
6945 }
6946 
6947 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6948 					  const char *qm_name,
6949 					  u64 qman_base,
6950 					  u32 qid_base,
6951 					  u64 *event_mask)
6952 {
6953 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6954 	u64 glbl_sts_addr, arb_err_addr;
6955 	char reg_desc[32];
6956 
6957 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6958 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6959 
6960 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6961 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6962 		glbl_sts_clr_val = 0;
6963 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6964 
6965 		if (!glbl_sts_val)
6966 			continue;
6967 
6968 		if (i == QMAN_STREAMS)
6969 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6970 		else
6971 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6972 
6973 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6974 			if (glbl_sts_val & BIT(j)) {
6975 				dev_err_ratelimited(hdev->dev,
6976 						"%s %s. err cause: %s\n",
6977 						qm_name, reg_desc,
6978 						gaudi_qman_error_cause[j]);
6979 				glbl_sts_clr_val |= BIT(j);
6980 			}
6981 		}
6982 		/* check for undefined opcode */
6983 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6984 				hdev->captured_err_info.undef_opcode.write_enable) {
6985 			memset(&hdev->captured_err_info.undef_opcode, 0,
6986 						sizeof(hdev->captured_err_info.undef_opcode));
6987 
6988 			hdev->captured_err_info.undef_opcode.write_enable = false;
6989 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6990 		}
6991 
6992 		/* Write 1 clear errors */
6993 		if (!hdev->stop_on_err)
6994 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6995 		else
6996 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6997 	}
6998 
6999 	arb_err_val = RREG32(arb_err_addr);
7000 
7001 	if (!arb_err_val)
7002 		return;
7003 
7004 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7005 		if (arb_err_val & BIT(j)) {
7006 			dev_err_ratelimited(hdev->dev,
7007 					"%s ARB_ERR. err cause: %s\n",
7008 					qm_name,
7009 					gaudi_qman_arb_error_cause[j]);
7010 		}
7011 	}
7012 }
7013 
7014 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7015 		struct hl_eq_sm_sei_data *sei_data)
7016 {
7017 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7018 
7019 	/* Flip the bits as the enum is ordered in the opposite way */
7020 	index = (index ^ 0x3) & 0x3;
7021 
7022 	switch (sei_data->sei_cause) {
7023 	case SM_SEI_SO_OVERFLOW:
7024 		dev_err_ratelimited(hdev->dev,
7025 			"%s SEI Error: SOB Group %u overflow/underflow",
7026 			gaudi_sync_manager_names[index],
7027 			le32_to_cpu(sei_data->sei_log));
7028 		break;
7029 	case SM_SEI_LBW_4B_UNALIGNED:
7030 		dev_err_ratelimited(hdev->dev,
7031 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7032 			gaudi_sync_manager_names[index],
7033 			le32_to_cpu(sei_data->sei_log));
7034 		break;
7035 	case SM_SEI_AXI_RESPONSE_ERR:
7036 		dev_err_ratelimited(hdev->dev,
7037 			"%s SEI Error: AXI ID %u response error",
7038 			gaudi_sync_manager_names[index],
7039 			le32_to_cpu(sei_data->sei_log));
7040 		break;
7041 	default:
7042 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7043 				le32_to_cpu(sei_data->sei_log));
7044 		break;
7045 	}
7046 }
7047 
7048 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7049 		struct hl_eq_ecc_data *ecc_data)
7050 {
7051 	struct ecc_info_extract_params params;
7052 	u64 ecc_address = 0, ecc_syndrom = 0;
7053 	u8 index, memory_wrapper_idx = 0;
7054 	bool extract_info_from_fw;
7055 	int rc;
7056 
7057 	if (hdev->asic_prop.fw_security_enabled) {
7058 		extract_info_from_fw = true;
7059 		goto extract_ecc_info;
7060 	}
7061 
7062 	switch (event_type) {
7063 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7064 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7065 		extract_info_from_fw = true;
7066 		break;
7067 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7068 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7069 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7070 		params.num_memories = 90;
7071 		params.derr = false;
7072 		extract_info_from_fw = false;
7073 		break;
7074 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7075 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7076 		params.block_address =
7077 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7078 		params.num_memories = 90;
7079 		params.derr = true;
7080 		extract_info_from_fw = false;
7081 		break;
7082 	case GAUDI_EVENT_MME0_ACC_SERR:
7083 	case GAUDI_EVENT_MME1_ACC_SERR:
7084 	case GAUDI_EVENT_MME2_ACC_SERR:
7085 	case GAUDI_EVENT_MME3_ACC_SERR:
7086 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7087 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7088 		params.num_memories = 128;
7089 		params.derr = false;
7090 		extract_info_from_fw = false;
7091 		break;
7092 	case GAUDI_EVENT_MME0_ACC_DERR:
7093 	case GAUDI_EVENT_MME1_ACC_DERR:
7094 	case GAUDI_EVENT_MME2_ACC_DERR:
7095 	case GAUDI_EVENT_MME3_ACC_DERR:
7096 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7097 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7098 		params.num_memories = 128;
7099 		params.derr = true;
7100 		extract_info_from_fw = false;
7101 		break;
7102 	case GAUDI_EVENT_MME0_SBAB_SERR:
7103 	case GAUDI_EVENT_MME1_SBAB_SERR:
7104 	case GAUDI_EVENT_MME2_SBAB_SERR:
7105 	case GAUDI_EVENT_MME3_SBAB_SERR:
7106 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7107 		params.block_address =
7108 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7109 		params.num_memories = 33;
7110 		params.derr = false;
7111 		extract_info_from_fw = false;
7112 		break;
7113 	case GAUDI_EVENT_MME0_SBAB_DERR:
7114 	case GAUDI_EVENT_MME1_SBAB_DERR:
7115 	case GAUDI_EVENT_MME2_SBAB_DERR:
7116 	case GAUDI_EVENT_MME3_SBAB_DERR:
7117 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7118 		params.block_address =
7119 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7120 		params.num_memories = 33;
7121 		params.derr = true;
7122 		extract_info_from_fw = false;
7123 		break;
7124 	default:
7125 		return;
7126 	}
7127 
7128 extract_ecc_info:
7129 	if (extract_info_from_fw) {
7130 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7131 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7132 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7133 	} else {
7134 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7135 				&ecc_syndrom, &memory_wrapper_idx);
7136 		if (rc)
7137 			return;
7138 	}
7139 
7140 	dev_err(hdev->dev,
7141 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7142 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7143 }
7144 
7145 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7146 {
7147 	u64 qman_base;
7148 	char desc[32];
7149 	u32 qid_base;
7150 	u8 index;
7151 
7152 	switch (event_type) {
7153 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7154 		index = event_type - GAUDI_EVENT_TPC0_QM;
7155 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7156 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7157 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7158 		break;
7159 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7160 		if (event_type == GAUDI_EVENT_MME0_QM) {
7161 			index = 0;
7162 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7163 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7164 			index = 2;
7165 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7166 		}
7167 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7168 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7169 		break;
7170 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7171 		index = event_type - GAUDI_EVENT_DMA0_QM;
7172 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7173 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7174 		if (index > 1)
7175 			qid_base++;
7176 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7177 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7178 		break;
7179 	case GAUDI_EVENT_NIC0_QM0:
7180 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7181 		qman_base = mmNIC0_QM0_BASE;
7182 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7183 		break;
7184 	case GAUDI_EVENT_NIC0_QM1:
7185 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7186 		qman_base = mmNIC0_QM1_BASE;
7187 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7188 		break;
7189 	case GAUDI_EVENT_NIC1_QM0:
7190 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7191 		qman_base = mmNIC1_QM0_BASE;
7192 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7193 		break;
7194 	case GAUDI_EVENT_NIC1_QM1:
7195 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7196 		qman_base = mmNIC1_QM1_BASE;
7197 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7198 		break;
7199 	case GAUDI_EVENT_NIC2_QM0:
7200 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7201 		qman_base = mmNIC2_QM0_BASE;
7202 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7203 		break;
7204 	case GAUDI_EVENT_NIC2_QM1:
7205 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7206 		qman_base = mmNIC2_QM1_BASE;
7207 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7208 		break;
7209 	case GAUDI_EVENT_NIC3_QM0:
7210 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7211 		qman_base = mmNIC3_QM0_BASE;
7212 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7213 		break;
7214 	case GAUDI_EVENT_NIC3_QM1:
7215 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7216 		qman_base = mmNIC3_QM1_BASE;
7217 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7218 		break;
7219 	case GAUDI_EVENT_NIC4_QM0:
7220 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7221 		qman_base = mmNIC4_QM0_BASE;
7222 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7223 		break;
7224 	case GAUDI_EVENT_NIC4_QM1:
7225 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7226 		qman_base = mmNIC4_QM1_BASE;
7227 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7228 		break;
7229 	default:
7230 		return;
7231 	}
7232 
7233 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7234 }
7235 
7236 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7237 					bool check_razwi, u64 *event_mask)
7238 {
7239 	bool is_read = false, is_write = false;
7240 	u16 engine_id[2], num_of_razwi_eng = 0;
7241 	char desc[64] = "";
7242 	u64 razwi_addr = 0;
7243 	u8 razwi_flags = 0;
7244 
7245 	/*
7246 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7247 	 * engine id it will get valid value.
7248 	 */
7249 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7250 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7251 
7252 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7253 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7254 		event_type, desc);
7255 
7256 	if (check_razwi) {
7257 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7258 						&is_write);
7259 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7260 
7261 		if (is_read)
7262 			razwi_flags |= HL_RAZWI_READ;
7263 		if (is_write)
7264 			razwi_flags |= HL_RAZWI_WRITE;
7265 
7266 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7267 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7268 				num_of_razwi_eng = 2;
7269 			else
7270 				num_of_razwi_eng = 1;
7271 		}
7272 
7273 		if (razwi_flags)
7274 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7275 					razwi_flags, event_mask);
7276 	}
7277 }
7278 
7279 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7280 					struct cpucp_pkt_sync_err *sync_err)
7281 {
7282 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7283 
7284 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7285 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7286 }
7287 
7288 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7289 					struct hl_eq_fw_alive *fw_alive)
7290 {
7291 	dev_err(hdev->dev,
7292 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7293 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7294 		le32_to_cpu(fw_alive->process_id),
7295 		le32_to_cpu(fw_alive->thread_id),
7296 		le64_to_cpu(fw_alive->uptime_seconds));
7297 }
7298 
7299 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7300 						void *data)
7301 {
7302 	char desc[64] = "", *type;
7303 	struct eq_nic_sei_event *eq_nic_sei = data;
7304 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7305 
7306 	switch (eq_nic_sei->axi_error_cause) {
7307 	case RXB:
7308 		type = "RXB";
7309 		break;
7310 	case RXE:
7311 		type = "RXE";
7312 		break;
7313 	case TXS:
7314 		type = "TXS";
7315 		break;
7316 	case TXE:
7317 		type = "TXE";
7318 		break;
7319 	case QPC_RESP:
7320 		type = "QPC_RESP";
7321 		break;
7322 	case NON_AXI_ERR:
7323 		type = "NON_AXI_ERR";
7324 		break;
7325 	case TMR:
7326 		type = "TMR";
7327 		break;
7328 	default:
7329 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7330 			eq_nic_sei->axi_error_cause);
7331 		type = "N/A";
7332 		break;
7333 	}
7334 
7335 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7336 			eq_nic_sei->id);
7337 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7338 		event_type, desc);
7339 }
7340 
7341 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7342 {
7343 	/* GAUDI doesn't support any reset except hard-reset */
7344 	return -EPERM;
7345 }
7346 
7347 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7348 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7349 {
7350 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7351 	int rc = 0;
7352 
7353 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7354 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7355 		if (!hbm_ecc_data) {
7356 			dev_err(hdev->dev, "No FW ECC data");
7357 			return 0;
7358 		}
7359 
7360 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7361 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7362 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7363 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7365 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7367 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7368 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7369 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7370 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7371 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7372 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7373 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7374 
7375 		dev_err(hdev->dev,
7376 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7377 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7378 		dev_err(hdev->dev,
7379 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7380 			device, ch, hbm_ecc_data->first_addr, type,
7381 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7382 			hbm_ecc_data->dec_cnt);
7383 		return 0;
7384 	}
7385 
7386 	if (hdev->asic_prop.fw_security_enabled) {
7387 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7388 		return 0;
7389 	}
7390 
7391 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7392 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7393 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7394 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7395 		if (val) {
7396 			rc = -EIO;
7397 			dev_err(hdev->dev,
7398 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7399 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7400 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7401 				(val >> 4) & 0x1);
7402 
7403 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7404 			dev_err(hdev->dev,
7405 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7406 				device, ch * 2,
7407 				RREG32(base + ch * 0x1000 + 0x064),
7408 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7409 				(val2 & 0xFF0000) >> 16,
7410 				(val2 & 0xFF000000) >> 24);
7411 		}
7412 
7413 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7414 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7415 		if (val) {
7416 			rc = -EIO;
7417 			dev_err(hdev->dev,
7418 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7419 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7420 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7421 				(val >> 4) & 0x1);
7422 
7423 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7424 			dev_err(hdev->dev,
7425 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7426 				device, ch * 2 + 1,
7427 				RREG32(base + ch * 0x1000 + 0x074),
7428 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7429 				(val2 & 0xFF0000) >> 16,
7430 				(val2 & 0xFF000000) >> 24);
7431 		}
7432 
7433 		/* Clear interrupts */
7434 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7435 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7436 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7437 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7438 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7439 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7440 	}
7441 
7442 	val  = RREG32(base + 0x8F30);
7443 	val2 = RREG32(base + 0x8F34);
7444 	if (val | val2) {
7445 		rc = -EIO;
7446 		dev_err(hdev->dev,
7447 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7448 			device, val, val2);
7449 	}
7450 	val  = RREG32(base + 0x8F40);
7451 	val2 = RREG32(base + 0x8F44);
7452 	if (val | val2) {
7453 		rc = -EIO;
7454 		dev_err(hdev->dev,
7455 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7456 			device, val, val2);
7457 	}
7458 
7459 	return rc;
7460 }
7461 
7462 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7463 {
7464 	switch (hbm_event_type) {
7465 	case GAUDI_EVENT_HBM0_SPI_0:
7466 	case GAUDI_EVENT_HBM0_SPI_1:
7467 		return 0;
7468 	case GAUDI_EVENT_HBM1_SPI_0:
7469 	case GAUDI_EVENT_HBM1_SPI_1:
7470 		return 1;
7471 	case GAUDI_EVENT_HBM2_SPI_0:
7472 	case GAUDI_EVENT_HBM2_SPI_1:
7473 		return 2;
7474 	case GAUDI_EVENT_HBM3_SPI_0:
7475 	case GAUDI_EVENT_HBM3_SPI_1:
7476 		return 3;
7477 	default:
7478 		break;
7479 	}
7480 
7481 	/* Should never happen */
7482 	return 0;
7483 }
7484 
7485 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7486 					char *interrupt_name)
7487 {
7488 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7489 	bool soft_reset_required = false;
7490 
7491 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7492 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7493 
7494 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7495 		if (tpc_interrupts_cause & BIT(i)) {
7496 			dev_err_ratelimited(hdev->dev,
7497 					"TPC%d_%s interrupt cause: %s\n",
7498 					tpc_id, interrupt_name,
7499 					gaudi_tpc_interrupts_cause[i]);
7500 			/* If this is QM error, we need to soft-reset */
7501 			if (i == 15)
7502 				soft_reset_required = true;
7503 		}
7504 
7505 	/* Clear interrupts */
7506 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7507 
7508 	return soft_reset_required;
7509 }
7510 
7511 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7512 {
7513 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7514 }
7515 
7516 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7517 {
7518 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7519 }
7520 
7521 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7522 {
7523 	ktime_t zero_time = ktime_set(0, 0);
7524 
7525 	mutex_lock(&hdev->clk_throttling.lock);
7526 
7527 	switch (event_type) {
7528 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7529 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7530 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7531 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7532 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7533 		dev_info_ratelimited(hdev->dev,
7534 			"Clock throttling due to power consumption\n");
7535 		break;
7536 
7537 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7538 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7539 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7540 		dev_info_ratelimited(hdev->dev,
7541 			"Power envelop is safe, back to optimal clock\n");
7542 		break;
7543 
7544 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7545 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7546 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7547 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7548 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7549 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7550 		dev_info_ratelimited(hdev->dev,
7551 			"Clock throttling due to overheating\n");
7552 		break;
7553 
7554 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7555 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7556 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7557 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7558 		dev_info_ratelimited(hdev->dev,
7559 			"Thermal envelop is safe, back to optimal clock\n");
7560 		break;
7561 
7562 	default:
7563 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7564 			event_type);
7565 		break;
7566 	}
7567 
7568 	mutex_unlock(&hdev->clk_throttling.lock);
7569 }
7570 
7571 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7572 {
7573 	struct gaudi_device *gaudi = hdev->asic_specific;
7574 	struct hl_info_fw_err_info fw_err_info;
7575 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7576 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7577 	u32 fw_fatal_err_flag = 0, flags = 0;
7578 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7579 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7580 	bool reset_required, reset_direct = false;
7581 	u8 cause;
7582 	int rc;
7583 
7584 	if (event_type >= GAUDI_EVENT_SIZE) {
7585 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7586 				event_type, GAUDI_EVENT_SIZE - 1);
7587 		return;
7588 	}
7589 
7590 	gaudi->events_stat[event_type]++;
7591 	gaudi->events_stat_aggregate[event_type]++;
7592 
7593 	switch (event_type) {
7594 	case GAUDI_EVENT_PCIE_CORE_DERR:
7595 	case GAUDI_EVENT_PCIE_IF_DERR:
7596 	case GAUDI_EVENT_PCIE_PHY_DERR:
7597 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7598 	case GAUDI_EVENT_MME0_ACC_DERR:
7599 	case GAUDI_EVENT_MME0_SBAB_DERR:
7600 	case GAUDI_EVENT_MME1_ACC_DERR:
7601 	case GAUDI_EVENT_MME1_SBAB_DERR:
7602 	case GAUDI_EVENT_MME2_ACC_DERR:
7603 	case GAUDI_EVENT_MME2_SBAB_DERR:
7604 	case GAUDI_EVENT_MME3_ACC_DERR:
7605 	case GAUDI_EVENT_MME3_SBAB_DERR:
7606 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7607 		fallthrough;
7608 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7609 	case GAUDI_EVENT_PSOC_MEM_DERR:
7610 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7611 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7612 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7613 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7614 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7615 	case GAUDI_EVENT_MMU_DERR:
7616 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7617 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7618 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7619 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7620 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7621 		goto reset_device;
7622 
7623 	case GAUDI_EVENT_GIC500:
7624 	case GAUDI_EVENT_AXI_ECC:
7625 	case GAUDI_EVENT_L2_RAM_ECC:
7626 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7627 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7628 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7629 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7630 		goto reset_device;
7631 
7632 	case GAUDI_EVENT_HBM0_SPI_0:
7633 	case GAUDI_EVENT_HBM1_SPI_0:
7634 	case GAUDI_EVENT_HBM2_SPI_0:
7635 	case GAUDI_EVENT_HBM3_SPI_0:
7636 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7637 		gaudi_hbm_read_interrupts(hdev,
7638 				gaudi_hbm_event_to_dev(event_type),
7639 				&eq_entry->hbm_ecc_data);
7640 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7641 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7642 		goto reset_device;
7643 
7644 	case GAUDI_EVENT_HBM0_SPI_1:
7645 	case GAUDI_EVENT_HBM1_SPI_1:
7646 	case GAUDI_EVENT_HBM2_SPI_1:
7647 	case GAUDI_EVENT_HBM3_SPI_1:
7648 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7649 		gaudi_hbm_read_interrupts(hdev,
7650 				gaudi_hbm_event_to_dev(event_type),
7651 				&eq_entry->hbm_ecc_data);
7652 		hl_fw_unmask_irq(hdev, event_type);
7653 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7654 		break;
7655 
7656 	case GAUDI_EVENT_TPC0_DEC:
7657 	case GAUDI_EVENT_TPC1_DEC:
7658 	case GAUDI_EVENT_TPC2_DEC:
7659 	case GAUDI_EVENT_TPC3_DEC:
7660 	case GAUDI_EVENT_TPC4_DEC:
7661 	case GAUDI_EVENT_TPC5_DEC:
7662 	case GAUDI_EVENT_TPC6_DEC:
7663 	case GAUDI_EVENT_TPC7_DEC:
7664 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7665 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7666 		 * The SW upper layer will inspect an internal mapped area to indicate
7667 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7668 		 */
7669 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7670 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7671 		reset_required = gaudi_tpc_read_interrupts(hdev,
7672 					tpc_dec_event_to_tpc_id(event_type),
7673 					"AXI_SLV_DEC_Error");
7674 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7675 		if (reset_required) {
7676 			dev_err(hdev->dev, "reset required due to %s\n",
7677 				gaudi_irq_map_table[event_type].name);
7678 
7679 			reset_direct = true;
7680 			goto reset_device;
7681 		} else {
7682 			hl_fw_unmask_irq(hdev, event_type);
7683 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7684 		}
7685 		break;
7686 
7687 	case GAUDI_EVENT_TPC0_KRN_ERR:
7688 	case GAUDI_EVENT_TPC1_KRN_ERR:
7689 	case GAUDI_EVENT_TPC2_KRN_ERR:
7690 	case GAUDI_EVENT_TPC3_KRN_ERR:
7691 	case GAUDI_EVENT_TPC4_KRN_ERR:
7692 	case GAUDI_EVENT_TPC5_KRN_ERR:
7693 	case GAUDI_EVENT_TPC6_KRN_ERR:
7694 	case GAUDI_EVENT_TPC7_KRN_ERR:
7695 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7696 		reset_required = gaudi_tpc_read_interrupts(hdev,
7697 					tpc_krn_event_to_tpc_id(event_type),
7698 					"KRN_ERR");
7699 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7700 		if (reset_required) {
7701 			dev_err(hdev->dev, "reset required due to %s\n",
7702 				gaudi_irq_map_table[event_type].name);
7703 
7704 			reset_direct = true;
7705 			goto reset_device;
7706 		} else {
7707 			hl_fw_unmask_irq(hdev, event_type);
7708 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7709 		}
7710 		break;
7711 
7712 	case GAUDI_EVENT_PCIE_CORE_SERR:
7713 	case GAUDI_EVENT_PCIE_IF_SERR:
7714 	case GAUDI_EVENT_PCIE_PHY_SERR:
7715 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7716 	case GAUDI_EVENT_MME0_ACC_SERR:
7717 	case GAUDI_EVENT_MME0_SBAB_SERR:
7718 	case GAUDI_EVENT_MME1_ACC_SERR:
7719 	case GAUDI_EVENT_MME1_SBAB_SERR:
7720 	case GAUDI_EVENT_MME2_ACC_SERR:
7721 	case GAUDI_EVENT_MME2_SBAB_SERR:
7722 	case GAUDI_EVENT_MME3_ACC_SERR:
7723 	case GAUDI_EVENT_MME3_SBAB_SERR:
7724 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7725 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7726 	case GAUDI_EVENT_PSOC_MEM_SERR:
7727 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7728 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7729 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7730 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7731 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7732 		fallthrough;
7733 	case GAUDI_EVENT_MMU_SERR:
7734 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7735 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7736 		hl_fw_unmask_irq(hdev, event_type);
7737 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7738 		break;
7739 
7740 	case GAUDI_EVENT_PCIE_DEC:
7741 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7742 	case GAUDI_EVENT_PSOC_AXI_DEC:
7743 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7744 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7745 		hl_fw_unmask_irq(hdev, event_type);
7746 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7747 		break;
7748 
7749 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7750 	case GAUDI_EVENT_MMU_WR_PERM:
7751 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7752 		hl_fw_unmask_irq(hdev, event_type);
7753 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7754 		break;
7755 
7756 	case GAUDI_EVENT_MME0_WBC_RSP:
7757 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7758 	case GAUDI_EVENT_MME1_WBC_RSP:
7759 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7760 	case GAUDI_EVENT_MME2_WBC_RSP:
7761 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7762 	case GAUDI_EVENT_MME3_WBC_RSP:
7763 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7764 	case GAUDI_EVENT_RAZWI_OR_ADC:
7765 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7766 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7767 		fallthrough;
7768 	case GAUDI_EVENT_NIC0_QM0:
7769 	case GAUDI_EVENT_NIC0_QM1:
7770 	case GAUDI_EVENT_NIC1_QM0:
7771 	case GAUDI_EVENT_NIC1_QM1:
7772 	case GAUDI_EVENT_NIC2_QM0:
7773 	case GAUDI_EVENT_NIC2_QM1:
7774 	case GAUDI_EVENT_NIC3_QM0:
7775 	case GAUDI_EVENT_NIC3_QM1:
7776 	case GAUDI_EVENT_NIC4_QM0:
7777 	case GAUDI_EVENT_NIC4_QM1:
7778 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7779 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7780 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7781 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7782 		hl_fw_unmask_irq(hdev, event_type);
7783 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7784 		break;
7785 
7786 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7787 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7788 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7789 		goto reset_device;
7790 
7791 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7792 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7793 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7794 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7795 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7796 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7797 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7798 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7799 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7800 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7801 		hl_fw_unmask_irq(hdev, event_type);
7802 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7803 		break;
7804 
7805 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7806 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7807 		hl_fw_unmask_irq(hdev, event_type);
7808 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7809 		break;
7810 
7811 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7812 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7813 		gaudi_print_sm_sei_info(hdev, event_type,
7814 					&eq_entry->sm_sei_data);
7815 		rc = hl_state_dump(hdev);
7816 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7817 		if (rc)
7818 			dev_err(hdev->dev,
7819 				"Error during system state dump %d\n", rc);
7820 		hl_fw_unmask_irq(hdev, event_type);
7821 		break;
7822 
7823 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7824 		break;
7825 
7826 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7827 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7828 		hl_fw_unmask_irq(hdev, event_type);
7829 		break;
7830 
7831 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7832 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7833 		dev_err(hdev->dev,
7834 			"Received high temp H/W interrupt %d (cause %d)\n",
7835 			event_type, cause);
7836 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7837 		break;
7838 
7839 	case GAUDI_EVENT_DEV_RESET_REQ:
7840 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7841 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7842 		goto reset_device;
7843 
7844 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7845 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7846 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7847 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7848 		goto reset_device;
7849 
7850 	case GAUDI_EVENT_FW_ALIVE_S:
7851 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7852 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7853 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7854 		fw_err_info.event_id = event_type;
7855 		fw_err_info.event_mask = &event_mask;
7856 		hl_handle_fw_err(hdev, &fw_err_info);
7857 		goto reset_device;
7858 
7859 	default:
7860 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7861 				event_type);
7862 		break;
7863 	}
7864 
7865 	if (event_mask)
7866 		hl_notifier_event_send_all(hdev, event_mask);
7867 
7868 	return;
7869 
7870 reset_device:
7871 	reset_required = true;
7872 
7873 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7874 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7875 
7876 		/* notify on device unavailable while the reset triggered by fw */
7877 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7878 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7879 	} else if (hdev->hard_reset_on_fw_events) {
7880 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7881 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7882 	} else {
7883 		reset_required = false;
7884 	}
7885 
7886 	if (reset_required) {
7887 		/* escalate general hw errors to critical/fatal error */
7888 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7889 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7890 
7891 		hl_device_cond_reset(hdev, flags, event_mask);
7892 	} else {
7893 		hl_fw_unmask_irq(hdev, event_type);
7894 		/* Notification on occurred event needs to be sent although reset is not executed */
7895 		if (event_mask)
7896 			hl_notifier_event_send_all(hdev, event_mask);
7897 	}
7898 }
7899 
7900 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7901 {
7902 	struct gaudi_device *gaudi = hdev->asic_specific;
7903 
7904 	if (aggregate) {
7905 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7906 		return gaudi->events_stat_aggregate;
7907 	}
7908 
7909 	*size = (u32) sizeof(gaudi->events_stat);
7910 	return gaudi->events_stat;
7911 }
7912 
7913 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7914 {
7915 	struct gaudi_device *gaudi = hdev->asic_specific;
7916 	u32 status, timeout_usec;
7917 	int rc;
7918 
7919 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7920 		hdev->reset_info.hard_reset_pending)
7921 		return 0;
7922 
7923 	if (hdev->pldm)
7924 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7925 	else
7926 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7927 
7928 	/* L0 & L1 invalidation */
7929 	WREG32(mmSTLB_INV_PS, 3);
7930 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7931 	WREG32(mmSTLB_INV_PS, 2);
7932 
7933 	rc = hl_poll_timeout(
7934 		hdev,
7935 		mmSTLB_INV_PS,
7936 		status,
7937 		!status,
7938 		1000,
7939 		timeout_usec);
7940 
7941 	WREG32(mmSTLB_INV_SET, 0);
7942 
7943 	return rc;
7944 }
7945 
7946 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7947 						bool is_hard, u32 flags,
7948 						u32 asid, u64 va, u64 size)
7949 {
7950 	/* Treat as invalidate all because there is no range invalidation
7951 	 * in Gaudi
7952 	 */
7953 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7954 }
7955 
7956 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7957 {
7958 	u32 status, timeout_usec;
7959 	int rc;
7960 
7961 	if (hdev->pldm)
7962 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7963 	else
7964 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7965 
7966 	WREG32(MMU_ASID, asid);
7967 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7968 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7969 	WREG32(MMU_BUSY, 0x80000000);
7970 
7971 	rc = hl_poll_timeout(
7972 		hdev,
7973 		MMU_BUSY,
7974 		status,
7975 		!(status & 0x80000000),
7976 		1000,
7977 		timeout_usec);
7978 
7979 	if (rc) {
7980 		dev_err(hdev->dev,
7981 			"Timeout during MMU hop0 config of asid %d\n", asid);
7982 		return rc;
7983 	}
7984 
7985 	return 0;
7986 }
7987 
7988 static int gaudi_send_heartbeat(struct hl_device *hdev)
7989 {
7990 	struct gaudi_device *gaudi = hdev->asic_specific;
7991 
7992 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7993 		return 0;
7994 
7995 	return hl_fw_send_heartbeat(hdev);
7996 }
7997 
7998 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7999 {
8000 	struct gaudi_device *gaudi = hdev->asic_specific;
8001 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8002 	int rc;
8003 
8004 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8005 		return 0;
8006 
8007 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8008 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8009 					mmCPU_BOOT_ERR1);
8010 	if (rc)
8011 		return rc;
8012 
8013 	if (!strlen(prop->cpucp_info.card_name))
8014 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8015 				CARD_NAME_MAX_LEN);
8016 
8017 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8018 
8019 	set_default_power_values(hdev);
8020 
8021 	return 0;
8022 }
8023 
8024 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8025 		struct engines_data *e)
8026 {
8027 	struct gaudi_device *gaudi = hdev->asic_specific;
8028 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8029 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8030 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8031 	unsigned long *mask = (unsigned long *)mask_arr;
8032 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8033 	bool is_idle = true, is_eng_idle, is_slave;
8034 	u64 offset;
8035 	int i, dma_id, port;
8036 
8037 	if (e)
8038 		hl_engine_data_sprintf(e,
8039 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8040 			"---  -------  ------------  ----------  -------------\n");
8041 
8042 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8043 		dma_id = gaudi_dma_assignment[i];
8044 		offset = dma_id * DMA_QMAN_OFFSET;
8045 
8046 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8047 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8048 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8049 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8050 				IS_DMA_IDLE(dma_core_sts0);
8051 		is_idle &= is_eng_idle;
8052 
8053 		if (mask && !is_eng_idle)
8054 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8055 		if (e)
8056 			hl_engine_data_sprintf(e, fmt, dma_id,
8057 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8058 				qm_cgm_sts, dma_core_sts0);
8059 	}
8060 
8061 	if (e)
8062 		hl_engine_data_sprintf(e,
8063 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8064 			"---  -------  ------------  ----------  ----------\n");
8065 
8066 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8067 		offset = i * TPC_QMAN_OFFSET;
8068 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8069 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8070 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8071 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8072 				IS_TPC_IDLE(tpc_cfg_sts);
8073 		is_idle &= is_eng_idle;
8074 
8075 		if (mask && !is_eng_idle)
8076 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8077 		if (e)
8078 			hl_engine_data_sprintf(e, fmt, i,
8079 				is_eng_idle ? "Y" : "N",
8080 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8081 	}
8082 
8083 	if (e)
8084 		hl_engine_data_sprintf(e,
8085 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8086 			"---  -------  ------------  ----------  -----------\n");
8087 
8088 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8089 		offset = i * MME_QMAN_OFFSET;
8090 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8091 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8092 
8093 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8094 		is_slave = i % 2;
8095 		if (!is_slave) {
8096 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8097 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8098 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8099 		}
8100 
8101 		is_idle &= is_eng_idle;
8102 
8103 		if (mask && !is_eng_idle)
8104 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8105 		if (e) {
8106 			if (!is_slave)
8107 				hl_engine_data_sprintf(e, fmt, i,
8108 					is_eng_idle ? "Y" : "N",
8109 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8110 			else
8111 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8112 					is_eng_idle ? "Y" : "N", "-",
8113 					"-", mme_arch_sts);
8114 		}
8115 	}
8116 
8117 	if (e)
8118 		hl_engine_data_sprintf(e,
8119 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8120 				"---  -------  ------------  ----------\n");
8121 
8122 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8123 		offset = i * NIC_MACRO_QMAN_OFFSET;
8124 		port = 2 * i;
8125 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8126 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8127 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8128 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8129 			is_idle &= is_eng_idle;
8130 
8131 			if (mask && !is_eng_idle)
8132 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8133 			if (e)
8134 				hl_engine_data_sprintf(e, nic_fmt, port,
8135 						is_eng_idle ? "Y" : "N",
8136 						qm_glbl_sts0, qm_cgm_sts);
8137 		}
8138 
8139 		port = 2 * i + 1;
8140 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8141 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8142 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8143 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8144 			is_idle &= is_eng_idle;
8145 
8146 			if (mask && !is_eng_idle)
8147 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8148 			if (e)
8149 				hl_engine_data_sprintf(e, nic_fmt, port,
8150 						is_eng_idle ? "Y" : "N",
8151 						qm_glbl_sts0, qm_cgm_sts);
8152 		}
8153 	}
8154 
8155 	if (e)
8156 		hl_engine_data_sprintf(e, "\n");
8157 
8158 	return is_idle;
8159 }
8160 
8161 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8162 	__acquires(&gaudi->hw_queues_lock)
8163 {
8164 	struct gaudi_device *gaudi = hdev->asic_specific;
8165 
8166 	spin_lock(&gaudi->hw_queues_lock);
8167 }
8168 
8169 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8170 	__releases(&gaudi->hw_queues_lock)
8171 {
8172 	struct gaudi_device *gaudi = hdev->asic_specific;
8173 
8174 	spin_unlock(&gaudi->hw_queues_lock);
8175 }
8176 
8177 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8178 {
8179 	return hdev->pdev->device;
8180 }
8181 
8182 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8183 				size_t max_size)
8184 {
8185 	struct gaudi_device *gaudi = hdev->asic_specific;
8186 
8187 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8188 		return 0;
8189 
8190 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8191 }
8192 
8193 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8194 {
8195 	struct gaudi_device *gaudi = hdev->asic_specific;
8196 
8197 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8198 		return 0;
8199 
8200 	return hl_fw_get_monitor_dump(hdev, data);
8201 }
8202 
8203 /*
8204  * this function should be used only during initialization and/or after reset,
8205  * when there are no active users.
8206  */
8207 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8208 {
8209 	u64 kernel_timeout;
8210 	u32 status, offset;
8211 	int rc;
8212 
8213 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8214 
8215 	if (hdev->pldm)
8216 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8217 	else
8218 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8219 
8220 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8221 			lower_32_bits(tpc_kernel));
8222 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8223 			upper_32_bits(tpc_kernel));
8224 
8225 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8226 			lower_32_bits(tpc_kernel));
8227 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8228 			upper_32_bits(tpc_kernel));
8229 	/* set a valid LUT pointer, content is of no significance */
8230 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8231 			lower_32_bits(tpc_kernel));
8232 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8233 			upper_32_bits(tpc_kernel));
8234 
8235 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8236 			lower_32_bits(CFG_BASE +
8237 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8238 
8239 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8240 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8241 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8242 	/* wait a bit for the engine to start executing */
8243 	usleep_range(1000, 1500);
8244 
8245 	/* wait until engine has finished executing */
8246 	rc = hl_poll_timeout(
8247 		hdev,
8248 		mmTPC0_CFG_STATUS + offset,
8249 		status,
8250 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8251 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8252 		1000,
8253 		kernel_timeout);
8254 
8255 	if (rc) {
8256 		dev_err(hdev->dev,
8257 			"Timeout while waiting for TPC%d icache prefetch\n",
8258 			tpc_id);
8259 		return -EIO;
8260 	}
8261 
8262 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8263 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8264 
8265 	/* wait a bit for the engine to start executing */
8266 	usleep_range(1000, 1500);
8267 
8268 	/* wait until engine has finished executing */
8269 	rc = hl_poll_timeout(
8270 		hdev,
8271 		mmTPC0_CFG_STATUS + offset,
8272 		status,
8273 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8274 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8275 		1000,
8276 		kernel_timeout);
8277 
8278 	if (rc) {
8279 		dev_err(hdev->dev,
8280 			"Timeout while waiting for TPC%d vector pipe\n",
8281 			tpc_id);
8282 		return -EIO;
8283 	}
8284 
8285 	rc = hl_poll_timeout(
8286 		hdev,
8287 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8288 		status,
8289 		(status == 0),
8290 		1000,
8291 		kernel_timeout);
8292 
8293 	if (rc) {
8294 		dev_err(hdev->dev,
8295 			"Timeout while waiting for TPC%d kernel to execute\n",
8296 			tpc_id);
8297 		return -EIO;
8298 	}
8299 
8300 	return 0;
8301 }
8302 
8303 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8304 		struct hl_ctx *ctx)
8305 {
8306 	struct gaudi_device *gaudi = hdev->asic_specific;
8307 	int min_alloc_order, rc, collective_cb_size;
8308 
8309 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8310 		return 0;
8311 
8312 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8313 							HOST_SPACE_INTERNAL_CB_SZ,
8314 							&hdev->internal_cb_pool_dma_addr,
8315 							GFP_KERNEL | __GFP_ZERO);
8316 
8317 	if (!hdev->internal_cb_pool_virt_addr)
8318 		return -ENOMEM;
8319 
8320 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8321 			sizeof(struct packet_fence);
8322 	min_alloc_order = ilog2(collective_cb_size);
8323 
8324 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8325 	if (!hdev->internal_cb_pool) {
8326 		dev_err(hdev->dev,
8327 			"Failed to create internal CB pool\n");
8328 		rc = -ENOMEM;
8329 		goto free_internal_cb_pool;
8330 	}
8331 
8332 	rc = gen_pool_add(hdev->internal_cb_pool,
8333 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8334 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8335 	if (rc) {
8336 		dev_err(hdev->dev,
8337 			"Failed to add memory to internal CB pool\n");
8338 		rc = -EFAULT;
8339 		goto destroy_internal_cb_pool;
8340 	}
8341 
8342 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8343 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8344 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8345 
8346 	if (!hdev->internal_cb_va_base) {
8347 		rc = -ENOMEM;
8348 		goto destroy_internal_cb_pool;
8349 	}
8350 
8351 	mutex_lock(&hdev->mmu_lock);
8352 
8353 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8354 			hdev->internal_cb_pool_dma_addr,
8355 			HOST_SPACE_INTERNAL_CB_SZ);
8356 	if (rc)
8357 		goto unreserve_internal_cb_pool;
8358 
8359 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8360 	if (rc)
8361 		goto unmap_internal_cb_pool;
8362 
8363 	mutex_unlock(&hdev->mmu_lock);
8364 
8365 	return 0;
8366 
8367 unmap_internal_cb_pool:
8368 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8369 			HOST_SPACE_INTERNAL_CB_SZ);
8370 unreserve_internal_cb_pool:
8371 	mutex_unlock(&hdev->mmu_lock);
8372 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8373 			HOST_SPACE_INTERNAL_CB_SZ);
8374 destroy_internal_cb_pool:
8375 	gen_pool_destroy(hdev->internal_cb_pool);
8376 free_internal_cb_pool:
8377 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8378 					hdev->internal_cb_pool_dma_addr);
8379 
8380 	return rc;
8381 }
8382 
8383 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8384 		struct hl_ctx *ctx)
8385 {
8386 	struct gaudi_device *gaudi = hdev->asic_specific;
8387 
8388 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8389 		return;
8390 
8391 	mutex_lock(&hdev->mmu_lock);
8392 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8393 			HOST_SPACE_INTERNAL_CB_SZ);
8394 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8395 			HOST_SPACE_INTERNAL_CB_SZ);
8396 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8397 	mutex_unlock(&hdev->mmu_lock);
8398 
8399 	gen_pool_destroy(hdev->internal_cb_pool);
8400 
8401 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8402 					hdev->internal_cb_pool_dma_addr);
8403 }
8404 
8405 static int gaudi_ctx_init(struct hl_ctx *ctx)
8406 {
8407 	int rc;
8408 
8409 	if (ctx->asid == HL_KERNEL_ASID_ID)
8410 		return 0;
8411 
8412 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8413 	if (rc)
8414 		return rc;
8415 
8416 	rc = gaudi_restore_user_registers(ctx->hdev);
8417 	if (rc)
8418 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8419 
8420 	return rc;
8421 }
8422 
8423 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8424 {
8425 	if (ctx->asid == HL_KERNEL_ASID_ID)
8426 		return;
8427 
8428 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8429 }
8430 
8431 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8432 {
8433 	return 0;
8434 }
8435 
8436 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8437 {
8438 	return gaudi_cq_assignment[cq_idx];
8439 }
8440 
8441 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8442 {
8443 	return sizeof(struct packet_msg_short) +
8444 			sizeof(struct packet_msg_prot) * 2;
8445 }
8446 
8447 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8448 {
8449 	return sizeof(struct packet_msg_short) * 4 +
8450 			sizeof(struct packet_fence) +
8451 			sizeof(struct packet_msg_prot) * 2;
8452 }
8453 
8454 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8455 {
8456 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8457 }
8458 
8459 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8460 				u32 size, bool eb)
8461 {
8462 	struct hl_cb *cb = (struct hl_cb *) data;
8463 	struct packet_msg_short *pkt;
8464 	u32 value, ctl, pkt_size = sizeof(*pkt);
8465 
8466 	pkt = cb->kernel_address + size;
8467 	memset(pkt, 0, pkt_size);
8468 
8469 	/* Inc by 1, Mode ADD */
8470 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8471 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8472 
8473 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8474 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8475 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8476 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8477 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8478 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8479 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8480 
8481 	pkt->value = cpu_to_le32(value);
8482 	pkt->ctl = cpu_to_le32(ctl);
8483 
8484 	return size + pkt_size;
8485 }
8486 
8487 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8488 					u16 addr)
8489 {
8490 	u32 ctl, pkt_size = sizeof(*pkt);
8491 
8492 	memset(pkt, 0, pkt_size);
8493 
8494 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8495 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8496 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8497 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8498 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8499 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8500 
8501 	pkt->value = cpu_to_le32(value);
8502 	pkt->ctl = cpu_to_le32(ctl);
8503 
8504 	return pkt_size;
8505 }
8506 
8507 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8508 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8509 		u16 sob_val, u16 mon_id)
8510 {
8511 	u64 monitor_base;
8512 	u32 ctl, value, pkt_size = sizeof(*pkt);
8513 	u16 msg_addr_offset;
8514 	u8 mask;
8515 
8516 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8517 		dev_err(hdev->dev,
8518 			"sob_base %u (mask %#x) is not valid\n",
8519 			sob_base, sob_mask);
8520 		return 0;
8521 	}
8522 
8523 	/*
8524 	 * monitor_base should be the content of the base0 address registers,
8525 	 * so it will be added to the msg short offsets
8526 	 */
8527 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8528 
8529 	msg_addr_offset =
8530 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8531 				monitor_base;
8532 
8533 	memset(pkt, 0, pkt_size);
8534 
8535 	/* Monitor config packet: bind the monitor to a sync object */
8536 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8537 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8538 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8539 			0); /* GREATER OR EQUAL*/
8540 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8541 
8542 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8543 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8544 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8545 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8546 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8547 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8548 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8549 
8550 	pkt->value = cpu_to_le32(value);
8551 	pkt->ctl = cpu_to_le32(ctl);
8552 
8553 	return pkt_size;
8554 }
8555 
8556 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8557 {
8558 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8559 
8560 	memset(pkt, 0, pkt_size);
8561 
8562 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8563 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8564 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8565 
8566 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8567 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8568 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8569 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8570 
8571 	pkt->cfg = cpu_to_le32(cfg);
8572 	pkt->ctl = cpu_to_le32(ctl);
8573 
8574 	return pkt_size;
8575 }
8576 
8577 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8578 {
8579 	u32 offset, nic_index;
8580 
8581 	switch (queue_id) {
8582 	case GAUDI_QUEUE_ID_DMA_0_0:
8583 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8584 		break;
8585 	case GAUDI_QUEUE_ID_DMA_0_1:
8586 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8587 		break;
8588 	case GAUDI_QUEUE_ID_DMA_0_2:
8589 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8590 		break;
8591 	case GAUDI_QUEUE_ID_DMA_0_3:
8592 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8593 		break;
8594 	case GAUDI_QUEUE_ID_DMA_1_0:
8595 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8596 		break;
8597 	case GAUDI_QUEUE_ID_DMA_1_1:
8598 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8599 		break;
8600 	case GAUDI_QUEUE_ID_DMA_1_2:
8601 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8602 		break;
8603 	case GAUDI_QUEUE_ID_DMA_1_3:
8604 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8605 		break;
8606 	case GAUDI_QUEUE_ID_DMA_5_0:
8607 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8608 		break;
8609 	case GAUDI_QUEUE_ID_DMA_5_1:
8610 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8611 		break;
8612 	case GAUDI_QUEUE_ID_DMA_5_2:
8613 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8614 		break;
8615 	case GAUDI_QUEUE_ID_DMA_5_3:
8616 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8617 		break;
8618 	case GAUDI_QUEUE_ID_TPC_7_0:
8619 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8620 		break;
8621 	case GAUDI_QUEUE_ID_TPC_7_1:
8622 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8623 		break;
8624 	case GAUDI_QUEUE_ID_TPC_7_2:
8625 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8626 		break;
8627 	case GAUDI_QUEUE_ID_TPC_7_3:
8628 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8629 		break;
8630 	case GAUDI_QUEUE_ID_NIC_0_0:
8631 	case GAUDI_QUEUE_ID_NIC_1_0:
8632 	case GAUDI_QUEUE_ID_NIC_2_0:
8633 	case GAUDI_QUEUE_ID_NIC_3_0:
8634 	case GAUDI_QUEUE_ID_NIC_4_0:
8635 	case GAUDI_QUEUE_ID_NIC_5_0:
8636 	case GAUDI_QUEUE_ID_NIC_6_0:
8637 	case GAUDI_QUEUE_ID_NIC_7_0:
8638 	case GAUDI_QUEUE_ID_NIC_8_0:
8639 	case GAUDI_QUEUE_ID_NIC_9_0:
8640 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8641 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8642 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8643 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8644 		break;
8645 	case GAUDI_QUEUE_ID_NIC_0_1:
8646 	case GAUDI_QUEUE_ID_NIC_1_1:
8647 	case GAUDI_QUEUE_ID_NIC_2_1:
8648 	case GAUDI_QUEUE_ID_NIC_3_1:
8649 	case GAUDI_QUEUE_ID_NIC_4_1:
8650 	case GAUDI_QUEUE_ID_NIC_5_1:
8651 	case GAUDI_QUEUE_ID_NIC_6_1:
8652 	case GAUDI_QUEUE_ID_NIC_7_1:
8653 	case GAUDI_QUEUE_ID_NIC_8_1:
8654 	case GAUDI_QUEUE_ID_NIC_9_1:
8655 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8656 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8657 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8658 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8659 		break;
8660 	case GAUDI_QUEUE_ID_NIC_0_2:
8661 	case GAUDI_QUEUE_ID_NIC_1_2:
8662 	case GAUDI_QUEUE_ID_NIC_2_2:
8663 	case GAUDI_QUEUE_ID_NIC_3_2:
8664 	case GAUDI_QUEUE_ID_NIC_4_2:
8665 	case GAUDI_QUEUE_ID_NIC_5_2:
8666 	case GAUDI_QUEUE_ID_NIC_6_2:
8667 	case GAUDI_QUEUE_ID_NIC_7_2:
8668 	case GAUDI_QUEUE_ID_NIC_8_2:
8669 	case GAUDI_QUEUE_ID_NIC_9_2:
8670 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8671 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8672 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8673 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8674 		break;
8675 	case GAUDI_QUEUE_ID_NIC_0_3:
8676 	case GAUDI_QUEUE_ID_NIC_1_3:
8677 	case GAUDI_QUEUE_ID_NIC_2_3:
8678 	case GAUDI_QUEUE_ID_NIC_3_3:
8679 	case GAUDI_QUEUE_ID_NIC_4_3:
8680 	case GAUDI_QUEUE_ID_NIC_5_3:
8681 	case GAUDI_QUEUE_ID_NIC_6_3:
8682 	case GAUDI_QUEUE_ID_NIC_7_3:
8683 	case GAUDI_QUEUE_ID_NIC_8_3:
8684 	case GAUDI_QUEUE_ID_NIC_9_3:
8685 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8686 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8687 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8688 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8689 		break;
8690 	default:
8691 		return -EINVAL;
8692 	}
8693 
8694 	*addr = CFG_BASE + offset;
8695 
8696 	return 0;
8697 }
8698 
8699 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8700 {
8701 	u64 monitor_base;
8702 	u32 size = 0;
8703 	u16 msg_addr_offset;
8704 
8705 	/*
8706 	 * monitor_base should be the content of the base0 address registers,
8707 	 * so it will be added to the msg short offsets
8708 	 */
8709 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8710 
8711 	/* First monitor config packet: low address of the sync */
8712 	msg_addr_offset =
8713 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8714 				monitor_base;
8715 
8716 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8717 					msg_addr_offset);
8718 
8719 	/* Second monitor config packet: high address of the sync */
8720 	msg_addr_offset =
8721 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8722 				monitor_base;
8723 
8724 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8725 					msg_addr_offset);
8726 
8727 	/*
8728 	 * Third monitor config packet: the payload, i.e. what to write when the
8729 	 * sync triggers
8730 	 */
8731 	msg_addr_offset =
8732 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8733 				monitor_base;
8734 
8735 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8736 
8737 	return size;
8738 }
8739 
8740 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8741 				struct hl_gen_wait_properties *prop)
8742 {
8743 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8744 	void *buf = cb->kernel_address;
8745 	u64 fence_addr = 0;
8746 	u32 size = prop->size;
8747 
8748 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8749 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8750 				prop->q_idx);
8751 		return 0;
8752 	}
8753 
8754 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8755 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8756 			prop->sob_mask, prop->sob_val, prop->mon_id);
8757 	size += gaudi_add_fence_pkt(buf + size);
8758 
8759 	return size;
8760 }
8761 
8762 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8763 {
8764 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8765 
8766 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8767 		hw_sob->sob_id);
8768 
8769 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8770 			hw_sob->sob_id * 4, 0);
8771 
8772 	kref_init(&hw_sob->kref);
8773 }
8774 
8775 static u64 gaudi_get_device_time(struct hl_device *hdev)
8776 {
8777 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8778 
8779 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8780 }
8781 
8782 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8783 				u32 *block_size, u32 *block_id)
8784 {
8785 	return -EPERM;
8786 }
8787 
8788 static int gaudi_block_mmap(struct hl_device *hdev,
8789 				struct vm_area_struct *vma,
8790 				u32 block_id, u32 block_size)
8791 {
8792 	return -EPERM;
8793 }
8794 
8795 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8796 {
8797 	struct cpu_dyn_regs *dyn_regs =
8798 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8799 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8800 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8801 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8802 
8803 	WREG32(irq_handler_offset,
8804 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8805 }
8806 
8807 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8808 {
8809 	return -EINVAL;
8810 }
8811 
8812 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8813 {
8814 	switch (pll_idx) {
8815 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8816 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8817 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8818 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8819 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8820 	case HL_GAUDI_MME_PLL: return MME_PLL;
8821 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8822 	case HL_GAUDI_IF_PLL: return IF_PLL;
8823 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8824 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8825 	default: return -EINVAL;
8826 	}
8827 }
8828 
8829 static int gaudi_add_sync_to_engine_map_entry(
8830 	struct hl_sync_to_engine_map *map, u32 reg_value,
8831 	enum hl_sync_engine_type engine_type, u32 engine_id)
8832 {
8833 	struct hl_sync_to_engine_map_entry *entry;
8834 
8835 	/* Reg value represents a partial address of sync object,
8836 	 * it is used as unique identifier. For this we need to
8837 	 * clear the cutoff cfg base bits from the value.
8838 	 */
8839 	if (reg_value == 0 || reg_value == 0xffffffff)
8840 		return 0;
8841 	reg_value -= lower_32_bits(CFG_BASE);
8842 
8843 	/* create a new hash entry */
8844 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8845 	if (!entry)
8846 		return -ENOMEM;
8847 	entry->engine_type = engine_type;
8848 	entry->engine_id = engine_id;
8849 	entry->sync_id = reg_value;
8850 	hash_add(map->tb, &entry->node, reg_value);
8851 
8852 	return 0;
8853 }
8854 
8855 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8856 				struct hl_sync_to_engine_map *map)
8857 {
8858 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8859 	int i, j, rc;
8860 	u32 reg_value;
8861 
8862 	/* Iterate over TPC engines */
8863 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8864 
8865 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8866 					sds->props[SP_NEXT_TPC] * i);
8867 
8868 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8869 							ENGINE_TPC, i);
8870 		if (rc)
8871 			goto free_sync_to_engine_map;
8872 	}
8873 
8874 	/* Iterate over MME engines */
8875 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8876 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8877 
8878 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8879 						sds->props[SP_NEXT_MME] * i +
8880 						j * sizeof(u32));
8881 
8882 			rc = gaudi_add_sync_to_engine_map_entry(
8883 				map, reg_value, ENGINE_MME,
8884 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8885 			if (rc)
8886 				goto free_sync_to_engine_map;
8887 		}
8888 	}
8889 
8890 	/* Iterate over DMA engines */
8891 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8892 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8893 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8894 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8895 							ENGINE_DMA, i);
8896 		if (rc)
8897 			goto free_sync_to_engine_map;
8898 	}
8899 
8900 	return 0;
8901 
8902 free_sync_to_engine_map:
8903 	hl_state_dump_free_sync_to_engine_map(map);
8904 
8905 	return rc;
8906 }
8907 
8908 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8909 {
8910 	return FIELD_GET(
8911 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8912 		mon->status);
8913 }
8914 
8915 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8916 {
8917 	const size_t max_write = 10;
8918 	u32 gid, mask, sob;
8919 	int i, offset;
8920 
8921 	/* Sync object ID is calculated as follows:
8922 	 * (8 * group_id + cleared bits in mask)
8923 	 */
8924 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8925 			mon->arm_data);
8926 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8927 			mon->arm_data);
8928 
8929 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8930 		max_write; mask >>= 1, i++) {
8931 		if (!(mask & 1)) {
8932 			sob = gid * MONITOR_MAX_SOBS + i;
8933 
8934 			if (offset > 0)
8935 				offset += snprintf(sobs + offset, max_write,
8936 							", ");
8937 
8938 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8939 		}
8940 	}
8941 }
8942 
8943 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8944 				struct hl_device *hdev,
8945 				struct hl_mon_state_dump *mon)
8946 {
8947 	const char *name;
8948 	char scratch_buf1[BIN_REG_STRING_SIZE],
8949 		scratch_buf2[BIN_REG_STRING_SIZE];
8950 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8951 
8952 	name = hl_state_dump_get_monitor_name(hdev, mon);
8953 	if (!name)
8954 		name = "";
8955 
8956 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8957 
8958 	return hl_snprintf_resize(
8959 		buf, size, offset,
8960 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8961 		mon->id, name,
8962 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8963 				mon->arm_data),
8964 		hl_format_as_binary(
8965 			scratch_buf1, sizeof(scratch_buf1),
8966 			FIELD_GET(
8967 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8968 				mon->arm_data)),
8969 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8970 				mon->arm_data),
8971 		mon->wr_data,
8972 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8973 		hl_format_as_binary(
8974 			scratch_buf2, sizeof(scratch_buf2),
8975 			FIELD_GET(
8976 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8977 				mon->status)),
8978 		monitored_sobs);
8979 }
8980 
8981 
8982 static int gaudi_print_fences_single_engine(
8983 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8984 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8985 	size_t *size, size_t *offset)
8986 {
8987 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8988 	int rc = -ENOMEM, i;
8989 	u32 *statuses, *fences;
8990 
8991 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8992 			sizeof(*statuses), GFP_KERNEL);
8993 	if (!statuses)
8994 		goto out;
8995 
8996 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8997 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8998 			 sizeof(*fences), GFP_KERNEL);
8999 	if (!fences)
9000 		goto free_status;
9001 
9002 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9003 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9004 
9005 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9006 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9007 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9008 
9009 	/* The actual print */
9010 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9011 		u32 fence_id;
9012 		u64 fence_cnt, fence_rdata;
9013 		const char *engine_name;
9014 
9015 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9016 			statuses[i]))
9017 			continue;
9018 
9019 		fence_id =
9020 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9021 		fence_cnt = base_offset + CFG_BASE +
9022 			sizeof(u32) *
9023 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9024 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9025 				sds->props[SP_FENCE0_RDATA_OFFSET];
9026 		engine_name = hl_sync_engine_to_string(engine_type);
9027 
9028 		rc = hl_snprintf_resize(
9029 			buf, size, offset,
9030 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9031 			engine_name, engine_id,
9032 			i, fence_id,
9033 			fence_cnt, engine_name, engine_id, fence_id, i,
9034 			fence_rdata, engine_name, engine_id, fence_id, i,
9035 			fences[fence_id],
9036 			statuses[i]);
9037 		if (rc)
9038 			goto free_fences;
9039 	}
9040 
9041 	rc = 0;
9042 
9043 free_fences:
9044 	kfree(fences);
9045 free_status:
9046 	kfree(statuses);
9047 out:
9048 	return rc;
9049 }
9050 
9051 
9052 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9053 	.monitor_valid = gaudi_monitor_valid,
9054 	.print_single_monitor = gaudi_print_single_monitor,
9055 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9056 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9057 };
9058 
9059 static void gaudi_state_dump_init(struct hl_device *hdev)
9060 {
9061 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9062 	int i;
9063 
9064 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9065 		hash_add(sds->so_id_to_str_tb,
9066 			&gaudi_so_id_to_str[i].node,
9067 			gaudi_so_id_to_str[i].id);
9068 
9069 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9070 		hash_add(sds->monitor_id_to_str_tb,
9071 			&gaudi_monitor_id_to_str[i].node,
9072 			gaudi_monitor_id_to_str[i].id);
9073 
9074 	sds->props = gaudi_state_dump_specs_props;
9075 
9076 	sds->sync_namager_names = gaudi_sync_manager_names;
9077 
9078 	sds->funcs = gaudi_state_dump_funcs;
9079 }
9080 
9081 static u32 *gaudi_get_stream_master_qid_arr(void)
9082 {
9083 	return gaudi_stream_master;
9084 }
9085 
9086 static int gaudi_set_dram_properties(struct hl_device *hdev)
9087 {
9088 	return 0;
9089 }
9090 
9091 static int gaudi_set_binning_masks(struct hl_device *hdev)
9092 {
9093 	return 0;
9094 }
9095 
9096 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9097 {
9098 }
9099 
9100 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9101 {
9102 	struct hl_device *hdev = dev_get_drvdata(dev);
9103 	struct cpucp_info *cpucp_info;
9104 
9105 	cpucp_info = &hdev->asic_prop.cpucp_info;
9106 
9107 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9108 }
9109 
9110 static DEVICE_ATTR_RO(infineon_ver);
9111 
9112 static struct attribute *gaudi_vrm_dev_attrs[] = {
9113 	&dev_attr_infineon_ver.attr,
9114 	NULL,
9115 };
9116 
9117 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9118 					struct attribute_group *dev_vrm_attr_grp)
9119 {
9120 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9121 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9122 }
9123 
9124 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9125 {
9126 	return 0;
9127 }
9128 
9129 static const struct hl_asic_funcs gaudi_funcs = {
9130 	.early_init = gaudi_early_init,
9131 	.early_fini = gaudi_early_fini,
9132 	.late_init = gaudi_late_init,
9133 	.late_fini = gaudi_late_fini,
9134 	.sw_init = gaudi_sw_init,
9135 	.sw_fini = gaudi_sw_fini,
9136 	.hw_init = gaudi_hw_init,
9137 	.hw_fini = gaudi_hw_fini,
9138 	.halt_engines = gaudi_halt_engines,
9139 	.suspend = gaudi_suspend,
9140 	.resume = gaudi_resume,
9141 	.mmap = gaudi_mmap,
9142 	.ring_doorbell = gaudi_ring_doorbell,
9143 	.pqe_write = gaudi_pqe_write,
9144 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9145 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9146 	.scrub_device_mem = gaudi_scrub_device_mem,
9147 	.scrub_device_dram = gaudi_scrub_device_dram,
9148 	.get_int_queue_base = gaudi_get_int_queue_base,
9149 	.test_queues = gaudi_test_queues,
9150 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9151 	.asic_dma_pool_free = gaudi_dma_pool_free,
9152 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9153 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9154 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9155 	.cs_parser = gaudi_cs_parser,
9156 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9157 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9158 	.update_eq_ci = gaudi_update_eq_ci,
9159 	.context_switch = gaudi_context_switch,
9160 	.restore_phase_topology = gaudi_restore_phase_topology,
9161 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9162 	.add_device_attr = gaudi_add_device_attr,
9163 	.handle_eqe = gaudi_handle_eqe,
9164 	.get_events_stat = gaudi_get_events_stat,
9165 	.read_pte = gaudi_read_pte,
9166 	.write_pte = gaudi_write_pte,
9167 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9168 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9169 	.mmu_prefetch_cache_range = NULL,
9170 	.send_heartbeat = gaudi_send_heartbeat,
9171 	.debug_coresight = gaudi_debug_coresight,
9172 	.is_device_idle = gaudi_is_device_idle,
9173 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9174 	.hw_queues_lock = gaudi_hw_queues_lock,
9175 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9176 	.get_pci_id = gaudi_get_pci_id,
9177 	.get_eeprom_data = gaudi_get_eeprom_data,
9178 	.get_monitor_dump = gaudi_get_monitor_dump,
9179 	.send_cpu_message = gaudi_send_cpu_message,
9180 	.pci_bars_map = gaudi_pci_bars_map,
9181 	.init_iatu = gaudi_init_iatu,
9182 	.rreg = hl_rreg,
9183 	.wreg = hl_wreg,
9184 	.halt_coresight = gaudi_halt_coresight,
9185 	.ctx_init = gaudi_ctx_init,
9186 	.ctx_fini = gaudi_ctx_fini,
9187 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9188 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9189 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9190 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9191 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9192 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9193 	.gen_signal_cb = gaudi_gen_signal_cb,
9194 	.gen_wait_cb = gaudi_gen_wait_cb,
9195 	.reset_sob = gaudi_reset_sob,
9196 	.reset_sob_group = gaudi_reset_sob_group,
9197 	.get_device_time = gaudi_get_device_time,
9198 	.pb_print_security_errors = NULL,
9199 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9200 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9201 	.get_dec_base_addr = NULL,
9202 	.scramble_addr = hl_mmu_scramble_addr,
9203 	.descramble_addr = hl_mmu_descramble_addr,
9204 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9205 	.get_hw_block_id = gaudi_get_hw_block_id,
9206 	.hw_block_mmap = gaudi_block_mmap,
9207 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9208 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9209 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9210 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9211 	.init_firmware_loader = gaudi_init_firmware_loader,
9212 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9213 	.state_dump_init = gaudi_state_dump_init,
9214 	.get_sob_addr = gaudi_get_sob_addr,
9215 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9216 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9217 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9218 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9219 	.access_dev_mem = hl_access_dev_mem,
9220 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9221 	.send_device_activity = gaudi_send_device_activity,
9222 	.set_dram_properties = gaudi_set_dram_properties,
9223 	.set_binning_masks = gaudi_set_binning_masks,
9224 };
9225 
9226 /**
9227  * gaudi_set_asic_funcs - set GAUDI function pointers
9228  *
9229  * @hdev: pointer to hl_device structure
9230  *
9231  */
9232 void gaudi_set_asic_funcs(struct hl_device *hdev)
9233 {
9234 	hdev->asic_funcs = &gaudi_funcs;
9235 }
9236