xref: /openbmc/linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision e65e175b07bef5974045cc42238de99057669ca7)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 		"gaudi cpu eq"
122 };
123 
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134 
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
137 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
138 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
139 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
140 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
141 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
142 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
143 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145 
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
148 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
149 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
150 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
151 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
152 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
153 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
154 	[PACKET_FENCE]		= sizeof(struct packet_fence),
155 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
156 	[PACKET_NOP]		= sizeof(struct packet_nop),
157 	[PACKET_STOP]		= sizeof(struct packet_stop),
158 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
159 	[PACKET_WAIT]		= sizeof(struct packet_wait),
160 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
161 };
162 
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 	switch (id) {
166 	case PACKET_WREG_32:
167 	case PACKET_WREG_BULK:
168 	case PACKET_MSG_LONG:
169 	case PACKET_MSG_SHORT:
170 	case PACKET_CP_DMA:
171 	case PACKET_REPEAT:
172 	case PACKET_MSG_PROT:
173 	case PACKET_FENCE:
174 	case PACKET_LIN_DMA:
175 	case PACKET_NOP:
176 	case PACKET_STOP:
177 	case PACKET_ARB_POINT:
178 	case PACKET_WAIT:
179 	case PACKET_LOAD_AND_EXE:
180 		return true;
181 	default:
182 		return false;
183 	}
184 }
185 
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 	"tpc_address_exceed_slm",
189 	"tpc_div_by_0",
190 	"tpc_spu_mac_overflow",
191 	"tpc_spu_addsub_overflow",
192 	"tpc_spu_abs_overflow",
193 	"tpc_spu_fp_dst_nan_inf",
194 	"tpc_spu_fp_dst_denorm",
195 	"tpc_vpu_mac_overflow",
196 	"tpc_vpu_addsub_overflow",
197 	"tpc_vpu_abs_overflow",
198 	"tpc_vpu_fp_dst_nan_inf",
199 	"tpc_vpu_fp_dst_denorm",
200 	"tpc_assertions",
201 	"tpc_illegal_instruction",
202 	"tpc_pc_wrap_around",
203 	"tpc_qm_sw_err",
204 	"tpc_hbw_rresp_err",
205 	"tpc_hbw_bresp_err",
206 	"tpc_lbw_rresp_err",
207 	"tpc_lbw_bresp_err"
208 };
209 
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 	"PQ AXI HBW error",
213 	"CQ AXI HBW error",
214 	"CP AXI HBW error",
215 	"CP error due to undefined OPCODE",
216 	"CP encountered STOP OPCODE",
217 	"CP AXI LBW error",
218 	"CP WRREG32 or WRBULK returned error",
219 	"N/A",
220 	"FENCE 0 inc over max value and clipped",
221 	"FENCE 1 inc over max value and clipped",
222 	"FENCE 2 inc over max value and clipped",
223 	"FENCE 3 inc over max value and clipped",
224 	"FENCE 0 dec under min value and clipped",
225 	"FENCE 1 dec under min value and clipped",
226 	"FENCE 2 dec under min value and clipped",
227 	"FENCE 3 dec under min value and clipped"
228 };
229 
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 	"Choice push while full error",
233 	"Choice Q watchdog error",
234 	"MSG AXI LBW returned with error"
235 };
236 
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352 
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382 
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396 
397 static s64 gaudi_state_dump_specs_props[] = {
398 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 	[SP_MON_OBJ_WR_ADDR_LOW] =
402 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 	[SP_MON_OBJ_WR_ADDR_HIGH] =
404 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 	[SP_FENCE0_CNT_OFFSET] =
426 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_FENCE0_RDATA_OFFSET] =
428 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 	[SP_NUM_CORES] = 1,
431 };
432 
433 static const int gaudi_queue_id_to_engine_id[] = {
434 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464 
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469 	"SYNC_MGR_E_N",
470 	"SYNC_MGR_W_N",
471 	"SYNC_MGR_E_S",
472 	"SYNC_MGR_W_S",
473 	NULL
474 };
475 
476 struct ecc_info_extract_params {
477 	u64 block_address;
478 	u32 num_memories;
479 	bool derr;
480 };
481 
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 								u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 					struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 					u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 					u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 				u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 				u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 				struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 		return HL_COLLECTIVE_MASTER;
505 
506 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 		return HL_COLLECTIVE_SLAVE;
509 
510 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 		return HL_COLLECTIVE_SLAVE;
513 
514 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 		return HL_COLLECTIVE_SLAVE;
517 
518 	return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520 
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 	struct asic_fixed_properties *prop = &hdev->asic_prop;
524 
525 	if (hdev->card_type == cpucp_card_type_pmc) {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527 
528 		if (prop->fw_security_enabled)
529 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 		else
531 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 	} else {
533 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 	}
536 }
537 
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 	struct asic_fixed_properties *prop = &hdev->asic_prop;
541 	u32 num_sync_stream_queues = 0;
542 	int i;
543 
544 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 	prop->hw_queues_props = kcalloc(prop->max_queues,
546 			sizeof(struct hw_queue_properties),
547 			GFP_KERNEL);
548 
549 	if (!prop->hw_queues_props)
550 		return -ENOMEM;
551 
552 	for (i = 0 ; i < prop->max_queues ; i++) {
553 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 			prop->hw_queues_props[i].driver_only = 0;
556 			prop->hw_queues_props[i].supports_sync_stream = 1;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 			num_sync_stream_queues++;
560 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 			prop->hw_queues_props[i].driver_only = 1;
563 			prop->hw_queues_props[i].supports_sync_stream = 0;
564 			prop->hw_queues_props[i].cb_alloc_flags =
565 				CB_ALLOC_KERNEL;
566 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 			prop->hw_queues_props[i].driver_only = 0;
569 			prop->hw_queues_props[i].supports_sync_stream = 0;
570 			prop->hw_queues_props[i].cb_alloc_flags =
571 				CB_ALLOC_USER;
572 
573 		}
574 		prop->hw_queues_props[i].collective_mode =
575 						get_collective_mode(hdev, i);
576 	}
577 
578 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 	prop->cfg_base_address = CFG_BASE;
580 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 	prop->host_base_address = HOST_PHYS_BASE;
582 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 	prop->collective_first_sob = 0;
586 	prop->collective_first_mon = 0;
587 
588 	/* 2 SOBs per internal queue stream are reserved for collective */
589 	prop->sync_stream_first_sob =
590 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 			* QMAN_STREAMS * HL_RSVD_SOBS;
592 
593 	/* 1 monitor per internal queue stream are reserved for collective
594 	 * 2 monitors per external queue stream are reserved for collective
595 	 */
596 	prop->sync_stream_first_mon =
597 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 			(NUMBER_OF_EXT_HW_QUEUES * 2);
599 
600 	prop->dram_base_address = DRAM_PHYS_BASE;
601 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604 
605 	prop->sram_base_address = SRAM_BASE_ADDR;
606 	prop->sram_size = SRAM_SIZE;
607 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 	prop->sram_user_base_address =
609 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610 
611 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613 
614 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 	if (hdev->pldm)
616 		prop->mmu_pgt_size = 0x800000; /* 8MB */
617 	else
618 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 	prop->mmu_pte_size = HL_PTE_SIZE;
620 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 	prop->dram_page_size = PAGE_SIZE_2MB;
623 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 	prop->dram_supports_virtual_memory = false;
625 
626 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 	prop->pmmu.end_addr =
638 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 	prop->pmmu.page_size = PAGE_SIZE_4KB;
640 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 	prop->pmmu.last_mask = LAST_MASK;
642 	/* TODO: will be duplicated until implementing per-MMU props */
643 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645 
646 	/* PMMU and HPMMU are the same except of page size */
647 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649 
650 	/* shifts and masks are the same in PMMU and DMMU */
651 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 	prop->dmmu.page_size = PAGE_SIZE_2MB;
655 
656 	prop->cfg_size = CFG_SIZE;
657 	prop->max_asid = MAX_ASID;
658 	prop->num_of_events = GAUDI_EVENT_SIZE;
659 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660 
661 	set_default_power_values(hdev);
662 
663 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665 
666 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668 
669 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670 					CARD_NAME_MAX_LEN);
671 
672 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673 
674 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_sob +
676 			(num_sync_stream_queues * HL_RSVD_SOBS);
677 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678 			prop->sync_stream_first_mon +
679 			(num_sync_stream_queues * HL_RSVD_MONS);
680 
681 	prop->first_available_user_interrupt = USHRT_MAX;
682 
683 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 		prop->first_available_cq[i] = USHRT_MAX;
685 
686 	prop->fw_cpu_boot_dev_sts0_valid = false;
687 	prop->fw_cpu_boot_dev_sts1_valid = false;
688 	prop->hard_reset_done_by_fw = false;
689 	prop->gic_interrupts_enable = true;
690 
691 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692 
693 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695 
696 	prop->use_get_power_for_reset_history = true;
697 
698 	prop->configurable_stop_on_err = true;
699 
700 	prop->set_max_power_on_device_init = true;
701 
702 	prop->dma_mask = 48;
703 
704 	return 0;
705 }
706 
707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 	bool is_wc[3] = {false, false, true};
711 	int rc;
712 
713 	rc = hl_pci_bars_map(hdev, name, is_wc);
714 	if (rc)
715 		return rc;
716 
717 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
719 
720 	return 0;
721 }
722 
723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725 	struct gaudi_device *gaudi = hdev->asic_specific;
726 	struct hl_inbound_pci_region pci_region;
727 	u64 old_addr = addr;
728 	int rc;
729 
730 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731 		return old_addr;
732 
733 	if (hdev->asic_prop.iatu_done_by_fw)
734 		return U64_MAX;
735 
736 	/* Inbound Region 2 - Bar 4 - Point to HBM */
737 	pci_region.mode = PCI_BAR_MATCH_MODE;
738 	pci_region.bar = HBM_BAR_ID;
739 	pci_region.addr = addr;
740 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741 	if (rc)
742 		return U64_MAX;
743 
744 	if (gaudi) {
745 		old_addr = gaudi->hbm_bar_cur_addr;
746 		gaudi->hbm_bar_cur_addr = addr;
747 	}
748 
749 	return old_addr;
750 }
751 
752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754 	struct hl_inbound_pci_region inbound_region;
755 	struct hl_outbound_pci_region outbound_region;
756 	int rc;
757 
758 	if (hdev->asic_prop.iatu_done_by_fw)
759 		return 0;
760 
761 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762 	inbound_region.mode = PCI_BAR_MATCH_MODE;
763 	inbound_region.bar = SRAM_BAR_ID;
764 	inbound_region.addr = SRAM_BASE_ADDR;
765 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766 	if (rc)
767 		goto done;
768 
769 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770 	inbound_region.mode = PCI_BAR_MATCH_MODE;
771 	inbound_region.bar = CFG_BAR_ID;
772 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
773 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774 	if (rc)
775 		goto done;
776 
777 	/* Inbound Region 2 - Bar 4 - Point to HBM */
778 	inbound_region.mode = PCI_BAR_MATCH_MODE;
779 	inbound_region.bar = HBM_BAR_ID;
780 	inbound_region.addr = DRAM_PHYS_BASE;
781 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782 	if (rc)
783 		goto done;
784 
785 	/* Outbound Region 0 - Point to Host */
786 	outbound_region.addr = HOST_PHYS_BASE;
787 	outbound_region.size = HOST_PHYS_SIZE;
788 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789 
790 done:
791 	return rc;
792 }
793 
794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796 	return RREG32(mmHW_STATE);
797 }
798 
799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801 	struct asic_fixed_properties *prop = &hdev->asic_prop;
802 	struct pci_dev *pdev = hdev->pdev;
803 	resource_size_t pci_bar_size;
804 	u32 fw_boot_status;
805 	int rc;
806 
807 	rc = gaudi_set_fixed_properties(hdev);
808 	if (rc) {
809 		dev_err(hdev->dev, "Failed setting fixed properties\n");
810 		return rc;
811 	}
812 
813 	/* Check BAR sizes */
814 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815 
816 	if (pci_bar_size != SRAM_BAR_SIZE) {
817 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819 		rc = -ENODEV;
820 		goto free_queue_props;
821 	}
822 
823 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824 
825 	if (pci_bar_size != CFG_BAR_SIZE) {
826 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828 		rc = -ENODEV;
829 		goto free_queue_props;
830 	}
831 
832 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834 
835 	/* If FW security is enabled at this point it means no access to ELBI */
836 	if (hdev->asic_prop.fw_security_enabled) {
837 		hdev->asic_prop.iatu_done_by_fw = true;
838 
839 		/*
840 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841 		 * decision can only be taken based on PCI ID security.
842 		 */
843 		hdev->asic_prop.gic_interrupts_enable = false;
844 		goto pci_init;
845 	}
846 
847 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848 				&fw_boot_status);
849 	if (rc)
850 		goto free_queue_props;
851 
852 	/* Check whether FW is configuring iATU */
853 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855 		hdev->asic_prop.iatu_done_by_fw = true;
856 
857 pci_init:
858 	rc = hl_pci_init(hdev);
859 	if (rc)
860 		goto free_queue_props;
861 
862 	/* Before continuing in the initialization, we need to read the preboot
863 	 * version to determine whether we run with a security-enabled firmware
864 	 */
865 	rc = hl_fw_read_preboot_status(hdev);
866 	if (rc) {
867 		if (hdev->reset_on_preboot_fail)
868 			hdev->asic_funcs->hw_fini(hdev, true, false);
869 		goto pci_fini;
870 	}
871 
872 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874 		hdev->asic_funcs->hw_fini(hdev, true, false);
875 	}
876 
877 	return 0;
878 
879 pci_fini:
880 	hl_pci_fini(hdev);
881 free_queue_props:
882 	kfree(hdev->asic_prop.hw_queues_props);
883 	return rc;
884 }
885 
886 static int gaudi_early_fini(struct hl_device *hdev)
887 {
888 	kfree(hdev->asic_prop.hw_queues_props);
889 	hl_pci_fini(hdev);
890 
891 	return 0;
892 }
893 
894 /**
895  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
896  *
897  * @hdev: pointer to hl_device structure
898  *
899  */
900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
901 {
902 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
903 	struct asic_fixed_properties *prop = &hdev->asic_prop;
904 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
905 	int rc;
906 
907 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
908 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
909 		struct gaudi_device *gaudi = hdev->asic_specific;
910 
911 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
912 			return 0;
913 
914 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
915 
916 		if (rc)
917 			return rc;
918 
919 		freq = pll_freq_arr[2];
920 	} else {
921 		/* Backward compatibility */
922 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
923 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
924 		nr = RREG32(mmPSOC_CPU_PLL_NR);
925 		nf = RREG32(mmPSOC_CPU_PLL_NF);
926 		od = RREG32(mmPSOC_CPU_PLL_OD);
927 
928 		if (div_sel == DIV_SEL_REF_CLK ||
929 				div_sel == DIV_SEL_DIVIDED_REF) {
930 			if (div_sel == DIV_SEL_REF_CLK)
931 				freq = PLL_REF_CLK;
932 			else
933 				freq = PLL_REF_CLK / (div_fctr + 1);
934 		} else if (div_sel == DIV_SEL_PLL_CLK ||
935 			div_sel == DIV_SEL_DIVIDED_PLL) {
936 			pll_clk = PLL_REF_CLK * (nf + 1) /
937 					((nr + 1) * (od + 1));
938 			if (div_sel == DIV_SEL_PLL_CLK)
939 				freq = pll_clk;
940 			else
941 				freq = pll_clk / (div_fctr + 1);
942 		} else {
943 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
944 			freq = 0;
945 		}
946 	}
947 
948 	prop->psoc_timestamp_frequency = freq;
949 	prop->psoc_pci_pll_nr = nr;
950 	prop->psoc_pci_pll_nf = nf;
951 	prop->psoc_pci_pll_od = od;
952 	prop->psoc_pci_pll_div_factor = div_fctr;
953 
954 	return 0;
955 }
956 
957 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
958 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
959 {
960 	struct asic_fixed_properties *prop = &hdev->asic_prop;
961 	struct packet_lin_dma *init_tpc_mem_pkt;
962 	struct hl_cs_job *job;
963 	struct hl_cb *cb;
964 	u64 dst_addr;
965 	u32 cb_size, ctl;
966 	u8 tpc_id;
967 	int rc;
968 
969 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
970 	if (!cb)
971 		return -EFAULT;
972 
973 	init_tpc_mem_pkt = cb->kernel_address;
974 	cb_size = sizeof(*init_tpc_mem_pkt);
975 	memset(init_tpc_mem_pkt, 0, cb_size);
976 
977 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
978 
979 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
980 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
981 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
982 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
983 
984 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
985 
986 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
987 
988 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
989 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
990 				round_up(prop->sram_user_base_address, SZ_8K));
991 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
992 
993 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
994 	if (!job) {
995 		dev_err(hdev->dev, "Failed to allocate a new job\n");
996 		rc = -ENOMEM;
997 		goto release_cb;
998 	}
999 
1000 	job->id = 0;
1001 	job->user_cb = cb;
1002 	atomic_inc(&job->user_cb->cs_cnt);
1003 	job->user_cb_size = cb_size;
1004 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1005 	job->patched_cb = job->user_cb;
1006 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1007 
1008 	hl_debugfs_add_job(hdev, job);
1009 
1010 	rc = gaudi_send_job_on_qman0(hdev, job);
1011 
1012 	if (rc)
1013 		goto free_job;
1014 
1015 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1016 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1017 		if (rc)
1018 			break;
1019 	}
1020 
1021 free_job:
1022 	hl_userptr_delete_list(hdev, &job->userptr_list);
1023 	hl_debugfs_remove_job(hdev, job);
1024 	kfree(job);
1025 	atomic_dec(&cb->cs_cnt);
1026 
1027 release_cb:
1028 	hl_cb_put(cb);
1029 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1030 
1031 	return rc;
1032 }
1033 
1034 /*
1035  * gaudi_init_tpc_mem() - Initialize TPC memories.
1036  * @hdev: Pointer to hl_device structure.
1037  *
1038  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1039  *
1040  * Return: 0 for success, negative value for error.
1041  */
1042 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1043 {
1044 	const struct firmware *fw;
1045 	size_t fw_size;
1046 	void *cpu_addr;
1047 	dma_addr_t dma_handle;
1048 	int rc, count = 5;
1049 
1050 again:
1051 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1052 	if (rc == -EINTR && count-- > 0) {
1053 		msleep(50);
1054 		goto again;
1055 	}
1056 
1057 	if (rc) {
1058 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1059 				GAUDI_TPC_FW_FILE);
1060 		goto out;
1061 	}
1062 
1063 	fw_size = fw->size;
1064 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1065 	if (!cpu_addr) {
1066 		dev_err(hdev->dev,
1067 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1068 			fw_size);
1069 		rc = -ENOMEM;
1070 		goto out;
1071 	}
1072 
1073 	memcpy(cpu_addr, fw->data, fw_size);
1074 
1075 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1076 
1077 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1078 
1079 out:
1080 	release_firmware(fw);
1081 	return rc;
1082 }
1083 
1084 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1085 {
1086 	struct gaudi_device *gaudi = hdev->asic_specific;
1087 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1088 	struct hl_hw_queue *q;
1089 	u32 i, sob_id, sob_group_id, queue_id;
1090 
1091 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1092 	sob_group_id =
1093 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1094 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1095 
1096 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1097 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1098 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1099 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1100 	}
1101 
1102 	/* Both DMA5 and TPC7 use the same resources since only a single
1103 	 * engine need to participate in the reduction process
1104 	 */
1105 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1106 	q = &hdev->kernel_queues[queue_id];
1107 	q->sync_stream_prop.collective_sob_id =
1108 			sob_id + NIC_NUMBER_OF_ENGINES;
1109 
1110 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1111 	q = &hdev->kernel_queues[queue_id];
1112 	q->sync_stream_prop.collective_sob_id =
1113 			sob_id + NIC_NUMBER_OF_ENGINES;
1114 }
1115 
1116 static void gaudi_sob_group_hw_reset(struct kref *ref)
1117 {
1118 	struct gaudi_hw_sob_group *hw_sob_group =
1119 		container_of(ref, struct gaudi_hw_sob_group, kref);
1120 	struct hl_device *hdev = hw_sob_group->hdev;
1121 	int i;
1122 
1123 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1124 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1125 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1126 
1127 	kref_init(&hw_sob_group->kref);
1128 }
1129 
1130 static void gaudi_sob_group_reset_error(struct kref *ref)
1131 {
1132 	struct gaudi_hw_sob_group *hw_sob_group =
1133 		container_of(ref, struct gaudi_hw_sob_group, kref);
1134 	struct hl_device *hdev = hw_sob_group->hdev;
1135 
1136 	dev_crit(hdev->dev,
1137 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1138 		hw_sob_group->base_sob_id);
1139 }
1140 
1141 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1142 {
1143 	struct gaudi_collective_properties *prop;
1144 	int i;
1145 
1146 	prop = &gaudi->collective_props;
1147 
1148 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1149 
1150 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1151 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1152 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1153 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1154 	/* Set collective engine bit */
1155 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1156 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1157 }
1158 
1159 static int gaudi_collective_init(struct hl_device *hdev)
1160 {
1161 	u32 i, sob_id, reserved_sobs_per_group;
1162 	struct gaudi_collective_properties *prop;
1163 	struct gaudi_device *gaudi;
1164 
1165 	gaudi = hdev->asic_specific;
1166 	prop = &gaudi->collective_props;
1167 	sob_id = hdev->asic_prop.collective_first_sob;
1168 
1169 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1170 	reserved_sobs_per_group =
1171 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1172 
1173 	/* Init SOB groups */
1174 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1175 		prop->hw_sob_group[i].hdev = hdev;
1176 		prop->hw_sob_group[i].base_sob_id = sob_id;
1177 		sob_id += reserved_sobs_per_group;
1178 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1179 	}
1180 
1181 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1182 		prop->next_sob_group_val[i] = 1;
1183 		prop->curr_sob_group_idx[i] = 0;
1184 		gaudi_collective_map_sobs(hdev, i);
1185 	}
1186 
1187 	gaudi_collective_mstr_sob_mask_set(gaudi);
1188 
1189 	return 0;
1190 }
1191 
1192 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1193 {
1194 	struct gaudi_device *gaudi = hdev->asic_specific;
1195 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1196 
1197 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1198 					gaudi_sob_group_hw_reset);
1199 }
1200 
1201 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1202 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1203 {
1204 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1205 	struct gaudi_collective_properties *cprop;
1206 	struct hl_gen_wait_properties wait_prop;
1207 	struct hl_sync_stream_properties *prop;
1208 	struct gaudi_device *gaudi;
1209 
1210 	gaudi = hdev->asic_specific;
1211 	cprop = &gaudi->collective_props;
1212 	queue_id = job->hw_queue_id;
1213 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1214 
1215 	master_sob_base =
1216 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1217 	master_monitor = prop->collective_mstr_mon_id[0];
1218 
1219 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1220 
1221 	dev_dbg(hdev->dev,
1222 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1223 		master_sob_base, cprop->mstr_sob_mask[0],
1224 		cprop->next_sob_group_val[stream],
1225 		master_monitor, queue_id);
1226 
1227 	wait_prop.data = (void *) job->patched_cb;
1228 	wait_prop.sob_base = master_sob_base;
1229 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1230 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1231 	wait_prop.mon_id = master_monitor;
1232 	wait_prop.q_idx = queue_id;
1233 	wait_prop.size = cb_size;
1234 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1235 
1236 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1237 	master_monitor = prop->collective_mstr_mon_id[1];
1238 
1239 	dev_dbg(hdev->dev,
1240 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1241 		master_sob_base, cprop->mstr_sob_mask[1],
1242 		cprop->next_sob_group_val[stream],
1243 		master_monitor, queue_id);
1244 
1245 	wait_prop.sob_base = master_sob_base;
1246 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1247 	wait_prop.mon_id = master_monitor;
1248 	wait_prop.size = cb_size;
1249 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1250 }
1251 
1252 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1253 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1254 {
1255 	struct hl_gen_wait_properties wait_prop;
1256 	struct hl_sync_stream_properties *prop;
1257 	u32 queue_id, cb_size = 0;
1258 
1259 	queue_id = job->hw_queue_id;
1260 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1261 
1262 	if (job->cs->encaps_signals) {
1263 		/* use the encaps signal handle store earlier in the flow
1264 		 * and set the SOB information from the encaps
1265 		 * signals handle
1266 		 */
1267 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1268 						cs_cmpl);
1269 
1270 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1271 				job->cs->sequence,
1272 				cs_cmpl->hw_sob->sob_id,
1273 				cs_cmpl->sob_val);
1274 	}
1275 
1276 	/* Add to wait CBs using slave monitor */
1277 	wait_prop.data = (void *) job->user_cb;
1278 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1279 	wait_prop.sob_mask = 0x1;
1280 	wait_prop.sob_val = cs_cmpl->sob_val;
1281 	wait_prop.mon_id = prop->collective_slave_mon_id;
1282 	wait_prop.q_idx = queue_id;
1283 	wait_prop.size = cb_size;
1284 
1285 	dev_dbg(hdev->dev,
1286 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1287 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1288 		prop->collective_slave_mon_id, queue_id);
1289 
1290 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1291 
1292 	dev_dbg(hdev->dev,
1293 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1294 		prop->collective_sob_id, queue_id);
1295 
1296 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1297 			prop->collective_sob_id, cb_size, false);
1298 }
1299 
1300 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1301 {
1302 	struct hl_cs_compl *signal_cs_cmpl =
1303 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1304 	struct hl_cs_compl *cs_cmpl =
1305 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1306 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1307 	struct gaudi_collective_properties *cprop;
1308 	u32 stream, queue_id, sob_group_offset;
1309 	struct gaudi_device *gaudi;
1310 	struct hl_device *hdev;
1311 	struct hl_cs_job *job;
1312 	struct hl_ctx *ctx;
1313 
1314 	ctx = cs->ctx;
1315 	hdev = ctx->hdev;
1316 	gaudi = hdev->asic_specific;
1317 	cprop = &gaudi->collective_props;
1318 
1319 	if (cs->encaps_signals) {
1320 		cs_cmpl->hw_sob = handle->hw_sob;
1321 		/* at this checkpoint we only need the hw_sob pointer
1322 		 * for the completion check before start going over the jobs
1323 		 * of the master/slaves, the sob_value will be taken later on
1324 		 * in gaudi_collective_slave_init_job depends on each
1325 		 * job wait offset value.
1326 		 */
1327 		cs_cmpl->sob_val = 0;
1328 	} else {
1329 		/* copy the SOB id and value of the signal CS */
1330 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1331 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1332 	}
1333 
1334 	/* check again if the signal cs already completed.
1335 	 * if yes then don't send any wait cs since the hw_sob
1336 	 * could be in reset already. if signal is not completed
1337 	 * then get refcount to hw_sob to prevent resetting the sob
1338 	 * while wait cs is not submitted.
1339 	 * note that this check is protected by two locks,
1340 	 * hw queue lock and completion object lock,
1341 	 * and the same completion object lock also protects
1342 	 * the hw_sob reset handler function.
1343 	 * The hw_queue lock prevent out of sync of hw_sob
1344 	 * refcount value, changed by signal/wait flows.
1345 	 */
1346 	spin_lock(&signal_cs_cmpl->lock);
1347 
1348 	if (completion_done(&cs->signal_fence->completion)) {
1349 		spin_unlock(&signal_cs_cmpl->lock);
1350 		return -EINVAL;
1351 	}
1352 	/* Increment kref since all slave queues are now waiting on it */
1353 	kref_get(&cs_cmpl->hw_sob->kref);
1354 
1355 	spin_unlock(&signal_cs_cmpl->lock);
1356 
1357 	/* Calculate the stream from collective master queue (1st job) */
1358 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1359 	stream = job->hw_queue_id % 4;
1360 	sob_group_offset =
1361 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1362 
1363 	list_for_each_entry(job, &cs->job_list, cs_node) {
1364 		queue_id = job->hw_queue_id;
1365 
1366 		if (hdev->kernel_queues[queue_id].collective_mode ==
1367 				HL_COLLECTIVE_MASTER)
1368 			gaudi_collective_master_init_job(hdev, job, stream,
1369 						sob_group_offset);
1370 		else
1371 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1372 	}
1373 
1374 	cs_cmpl->sob_group = sob_group_offset;
1375 
1376 	/* Handle sob group kref and wraparound */
1377 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1378 	cprop->next_sob_group_val[stream]++;
1379 
1380 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1381 		/*
1382 		 * Decrement as we reached the max value.
1383 		 * The release function won't be called here as we've
1384 		 * just incremented the refcount.
1385 		 */
1386 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1387 				gaudi_sob_group_reset_error);
1388 		cprop->next_sob_group_val[stream] = 1;
1389 		/* only two SOBs are currently in use */
1390 		cprop->curr_sob_group_idx[stream] =
1391 			(cprop->curr_sob_group_idx[stream] + 1) &
1392 							(HL_RSVD_SOBS - 1);
1393 
1394 		gaudi_collective_map_sobs(hdev, stream);
1395 
1396 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1397 				cprop->curr_sob_group_idx[stream], stream);
1398 	}
1399 
1400 	mb();
1401 	hl_fence_put(cs->signal_fence);
1402 	cs->signal_fence = NULL;
1403 
1404 	return 0;
1405 }
1406 
1407 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1408 {
1409 	u32 cacheline_end, additional_commands;
1410 
1411 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1412 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1413 
1414 	if (user_cb_size + additional_commands > cacheline_end)
1415 		return cacheline_end - user_cb_size + additional_commands;
1416 	else
1417 		return additional_commands;
1418 }
1419 
1420 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1421 		struct hl_ctx *ctx, struct hl_cs *cs,
1422 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1423 		u32 encaps_signal_offset)
1424 {
1425 	struct hw_queue_properties *hw_queue_prop;
1426 	struct hl_cs_counters_atomic *cntr;
1427 	struct hl_cs_job *job;
1428 	struct hl_cb *cb;
1429 	u32 cb_size;
1430 	bool patched_cb;
1431 
1432 	cntr = &hdev->aggregated_cs_counters;
1433 
1434 	if (mode == HL_COLLECTIVE_MASTER) {
1435 		/* CB size of collective master queue contains
1436 		 * 4 msg short packets for monitor 1 configuration
1437 		 * 1 fence packet
1438 		 * 4 msg short packets for monitor 2 configuration
1439 		 * 1 fence packet
1440 		 * 2 msg prot packets for completion and MSI
1441 		 */
1442 		cb_size = sizeof(struct packet_msg_short) * 8 +
1443 				sizeof(struct packet_fence) * 2 +
1444 				sizeof(struct packet_msg_prot) * 2;
1445 		patched_cb = true;
1446 	} else {
1447 		/* CB size of collective slave queues contains
1448 		 * 4 msg short packets for monitor configuration
1449 		 * 1 fence packet
1450 		 * 1 additional msg short packet for sob signal
1451 		 */
1452 		cb_size = sizeof(struct packet_msg_short) * 5 +
1453 				sizeof(struct packet_fence);
1454 		patched_cb = false;
1455 	}
1456 
1457 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1458 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1459 	if (!job) {
1460 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1461 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1462 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1463 		return -ENOMEM;
1464 	}
1465 
1466 	/* Allocate internal mapped CB for non patched CBs */
1467 	cb = hl_cb_kernel_create(hdev, cb_size,
1468 			hdev->mmu_enable && !patched_cb);
1469 	if (!cb) {
1470 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1471 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1472 		kfree(job);
1473 		return -EFAULT;
1474 	}
1475 
1476 	job->id = 0;
1477 	job->cs = cs;
1478 	job->user_cb = cb;
1479 	atomic_inc(&job->user_cb->cs_cnt);
1480 	job->user_cb_size = cb_size;
1481 	job->hw_queue_id = queue_id;
1482 
1483 	/* since its guaranteed to have only one chunk in the collective wait
1484 	 * cs, we can use this chunk to set the encapsulated signal offset
1485 	 * in the jobs.
1486 	 */
1487 	if (cs->encaps_signals)
1488 		job->encaps_sig_wait_offset = encaps_signal_offset;
1489 
1490 	/*
1491 	 * No need in parsing, user CB is the patched CB.
1492 	 * We call hl_cb_destroy() out of two reasons - we don't need
1493 	 * the CB in the CB idr anymore and to decrement its refcount as
1494 	 * it was incremented inside hl_cb_kernel_create().
1495 	 */
1496 	if (patched_cb)
1497 		job->patched_cb = job->user_cb;
1498 	else
1499 		job->patched_cb = NULL;
1500 
1501 	job->job_cb_size = job->user_cb_size;
1502 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1503 
1504 	/* increment refcount as for external queues we get completion */
1505 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1506 		cs_get(cs);
1507 
1508 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1509 
1510 	list_add_tail(&job->cs_node, &cs->job_list);
1511 
1512 	hl_debugfs_add_job(hdev, job);
1513 
1514 	return 0;
1515 }
1516 
1517 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1518 		struct hl_ctx *ctx, struct hl_cs *cs,
1519 		u32 wait_queue_id, u32 collective_engine_id,
1520 		u32 encaps_signal_offset)
1521 {
1522 	struct gaudi_device *gaudi = hdev->asic_specific;
1523 	struct hw_queue_properties *hw_queue_prop;
1524 	u32 queue_id, collective_queue, num_jobs;
1525 	u32 stream, nic_queue, nic_idx = 0;
1526 	bool skip;
1527 	int i, rc = 0;
1528 
1529 	/* Verify wait queue id is configured as master */
1530 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1531 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1532 		dev_err(hdev->dev,
1533 			"Queue %d is not configured as collective master\n",
1534 			wait_queue_id);
1535 		return -EINVAL;
1536 	}
1537 
1538 	/* Verify engine id is supported */
1539 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1540 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1541 		dev_err(hdev->dev,
1542 			"Collective wait does not support engine %u\n",
1543 			collective_engine_id);
1544 		return -EINVAL;
1545 	}
1546 
1547 	stream = wait_queue_id % 4;
1548 
1549 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1550 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1551 	else
1552 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1553 
1554 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1555 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1556 
1557 	/* First job goes to the collective master queue, it will wait for
1558 	 * the collective slave queues to finish execution.
1559 	 * The synchronization is done using two monitors:
1560 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1561 	 * reduction engine (DMA5/TPC7).
1562 	 *
1563 	 * Rest of the jobs goes to the collective slave queues which will
1564 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1565 	 */
1566 	for (i = 0 ; i < num_jobs ; i++) {
1567 		if (i == 0) {
1568 			queue_id = wait_queue_id;
1569 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1570 				HL_COLLECTIVE_MASTER, queue_id,
1571 				wait_queue_id, encaps_signal_offset);
1572 		} else {
1573 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1574 				if (gaudi->hw_cap_initialized &
1575 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1576 					skip = false;
1577 				else
1578 					skip = true;
1579 
1580 				queue_id = nic_queue;
1581 				nic_queue += 4;
1582 				nic_idx++;
1583 
1584 				if (skip)
1585 					continue;
1586 			} else {
1587 				queue_id = collective_queue;
1588 			}
1589 
1590 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1591 				HL_COLLECTIVE_SLAVE, queue_id,
1592 				wait_queue_id, encaps_signal_offset);
1593 		}
1594 
1595 		if (rc)
1596 			return rc;
1597 	}
1598 
1599 	return rc;
1600 }
1601 
1602 static int gaudi_late_init(struct hl_device *hdev)
1603 {
1604 	struct gaudi_device *gaudi = hdev->asic_specific;
1605 	int rc;
1606 
1607 	rc = gaudi->cpucp_info_get(hdev);
1608 	if (rc) {
1609 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1610 		return rc;
1611 	}
1612 
1613 	if ((hdev->card_type == cpucp_card_type_pci) &&
1614 			(hdev->nic_ports_mask & 0x3)) {
1615 		dev_info(hdev->dev,
1616 			"PCI card detected, only 8 ports are enabled\n");
1617 		hdev->nic_ports_mask &= ~0x3;
1618 
1619 		/* Stop and disable unused NIC QMANs */
1620 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1621 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1622 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1623 
1624 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1625 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1626 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627 
1628 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1629 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1630 
1631 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1632 	}
1633 
1634 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1635 	if (rc) {
1636 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1637 		return rc;
1638 	}
1639 
1640 	/* Scrub both SRAM and DRAM */
1641 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1642 	if (rc)
1643 		goto disable_pci_access;
1644 
1645 	rc = gaudi_fetch_psoc_frequency(hdev);
1646 	if (rc) {
1647 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1648 		goto disable_pci_access;
1649 	}
1650 
1651 	rc = gaudi_mmu_clear_pgt_range(hdev);
1652 	if (rc) {
1653 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1654 		goto disable_pci_access;
1655 	}
1656 
1657 	rc = gaudi_init_tpc_mem(hdev);
1658 	if (rc) {
1659 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1660 		goto disable_pci_access;
1661 	}
1662 
1663 	rc = gaudi_collective_init(hdev);
1664 	if (rc) {
1665 		dev_err(hdev->dev, "Failed to init collective\n");
1666 		goto disable_pci_access;
1667 	}
1668 
1669 	/* We only support a single ASID for the user, so for the sake of optimization, just
1670 	 * initialize the ASID one time during device initialization with the fixed value of 1
1671 	 */
1672 	gaudi_mmu_prepare(hdev, 1);
1673 
1674 	hl_fw_set_pll_profile(hdev);
1675 
1676 	return 0;
1677 
1678 disable_pci_access:
1679 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1680 
1681 	return rc;
1682 }
1683 
1684 static void gaudi_late_fini(struct hl_device *hdev)
1685 {
1686 	hl_hwmon_release_resources(hdev);
1687 }
1688 
1689 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1690 {
1691 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1692 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1693 	int i, j, rc = 0;
1694 
1695 	/*
1696 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1697 	 * to '1' when accessing the host.
1698 	 * Bits 49:39 of the full host address are saved for a later
1699 	 * configuration of the HW to perform extension to 50 bits.
1700 	 * Because there is a single HW register that holds the extension bits,
1701 	 * these bits must be identical in all allocated range.
1702 	 */
1703 
1704 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1705 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1706 								&dma_addr_arr[i],
1707 								GFP_KERNEL | __GFP_ZERO);
1708 		if (!virt_addr_arr[i]) {
1709 			rc = -ENOMEM;
1710 			goto free_dma_mem_arr;
1711 		}
1712 
1713 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1714 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1715 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1716 			break;
1717 	}
1718 
1719 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1720 		dev_err(hdev->dev,
1721 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1722 		rc = -EFAULT;
1723 		goto free_dma_mem_arr;
1724 	}
1725 
1726 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1727 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1728 	hdev->cpu_pci_msb_addr =
1729 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1730 
1731 	if (!hdev->asic_prop.fw_security_enabled)
1732 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1733 
1734 free_dma_mem_arr:
1735 	for (j = 0 ; j < i ; j++)
1736 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1737 						dma_addr_arr[j]);
1738 
1739 	return rc;
1740 }
1741 
1742 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1743 {
1744 	struct gaudi_device *gaudi = hdev->asic_specific;
1745 	struct gaudi_internal_qman_info *q;
1746 	u32 i;
1747 
1748 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1749 		q = &gaudi->internal_qmans[i];
1750 		if (!q->pq_kernel_addr)
1751 			continue;
1752 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1753 	}
1754 }
1755 
1756 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1757 {
1758 	struct gaudi_device *gaudi = hdev->asic_specific;
1759 	struct gaudi_internal_qman_info *q;
1760 	int rc, i;
1761 
1762 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1763 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1764 			continue;
1765 
1766 		q = &gaudi->internal_qmans[i];
1767 
1768 		switch (i) {
1769 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1770 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1771 			break;
1772 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1773 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1774 			break;
1775 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1776 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1777 			break;
1778 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1779 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1780 			break;
1781 		default:
1782 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1783 			rc = -EINVAL;
1784 			goto free_internal_qmans_pq_mem;
1785 		}
1786 
1787 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1788 								GFP_KERNEL | __GFP_ZERO);
1789 		if (!q->pq_kernel_addr) {
1790 			rc = -ENOMEM;
1791 			goto free_internal_qmans_pq_mem;
1792 		}
1793 	}
1794 
1795 	return 0;
1796 
1797 free_internal_qmans_pq_mem:
1798 	gaudi_free_internal_qmans_pq_mem(hdev);
1799 	return rc;
1800 }
1801 
1802 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1803 {
1804 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1805 	struct pci_mem_region *region;
1806 
1807 	/* CFG */
1808 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1809 	region->region_base = CFG_BASE;
1810 	region->region_size = CFG_SIZE;
1811 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1812 	region->bar_size = CFG_BAR_SIZE;
1813 	region->bar_id = CFG_BAR_ID;
1814 	region->used = 1;
1815 
1816 	/* SRAM */
1817 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1818 	region->region_base = SRAM_BASE_ADDR;
1819 	region->region_size = SRAM_SIZE;
1820 	region->offset_in_bar = 0;
1821 	region->bar_size = SRAM_BAR_SIZE;
1822 	region->bar_id = SRAM_BAR_ID;
1823 	region->used = 1;
1824 
1825 	/* DRAM */
1826 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1827 	region->region_base = DRAM_PHYS_BASE;
1828 	region->region_size = hdev->asic_prop.dram_size;
1829 	region->offset_in_bar = 0;
1830 	region->bar_size = prop->dram_pci_bar_size;
1831 	region->bar_id = HBM_BAR_ID;
1832 	region->used = 1;
1833 
1834 	/* SP SRAM */
1835 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1836 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1837 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1838 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1839 	region->bar_size = CFG_BAR_SIZE;
1840 	region->bar_id = CFG_BAR_ID;
1841 	region->used = 1;
1842 }
1843 
1844 static int gaudi_sw_init(struct hl_device *hdev)
1845 {
1846 	struct gaudi_device *gaudi;
1847 	u32 i, event_id = 0;
1848 	int rc;
1849 
1850 	/* Allocate device structure */
1851 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1852 	if (!gaudi)
1853 		return -ENOMEM;
1854 
1855 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1856 		if (gaudi_irq_map_table[i].valid) {
1857 			if (event_id == GAUDI_EVENT_SIZE) {
1858 				dev_err(hdev->dev,
1859 					"Event array exceeds the limit of %u events\n",
1860 					GAUDI_EVENT_SIZE);
1861 				rc = -EINVAL;
1862 				goto free_gaudi_device;
1863 			}
1864 
1865 			gaudi->events[event_id++] =
1866 					gaudi_irq_map_table[i].fc_id;
1867 		}
1868 	}
1869 
1870 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1871 
1872 	hdev->asic_specific = gaudi;
1873 
1874 	/* Create DMA pool for small allocations */
1875 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1876 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1877 	if (!hdev->dma_pool) {
1878 		dev_err(hdev->dev, "failed to create DMA pool\n");
1879 		rc = -ENOMEM;
1880 		goto free_gaudi_device;
1881 	}
1882 
1883 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1884 	if (rc)
1885 		goto free_dma_pool;
1886 
1887 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1888 	if (!hdev->cpu_accessible_dma_pool) {
1889 		dev_err(hdev->dev,
1890 			"Failed to create CPU accessible DMA pool\n");
1891 		rc = -ENOMEM;
1892 		goto free_cpu_dma_mem;
1893 	}
1894 
1895 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1896 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1897 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1898 	if (rc) {
1899 		dev_err(hdev->dev,
1900 			"Failed to add memory to CPU accessible DMA pool\n");
1901 		rc = -EFAULT;
1902 		goto free_cpu_accessible_dma_pool;
1903 	}
1904 
1905 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1906 	if (rc)
1907 		goto free_cpu_accessible_dma_pool;
1908 
1909 	spin_lock_init(&gaudi->hw_queues_lock);
1910 
1911 	hdev->supports_sync_stream = true;
1912 	hdev->supports_coresight = true;
1913 	hdev->supports_staged_submission = true;
1914 	hdev->supports_wait_for_multi_cs = true;
1915 
1916 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1917 	hdev->stream_master_qid_arr =
1918 				hdev->asic_funcs->get_stream_master_qid_arr();
1919 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1920 
1921 	return 0;
1922 
1923 free_cpu_accessible_dma_pool:
1924 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1925 free_cpu_dma_mem:
1926 	if (!hdev->asic_prop.fw_security_enabled)
1927 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1928 					hdev->cpu_pci_msb_addr);
1929 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1930 					hdev->cpu_accessible_dma_address);
1931 free_dma_pool:
1932 	dma_pool_destroy(hdev->dma_pool);
1933 free_gaudi_device:
1934 	kfree(gaudi);
1935 	return rc;
1936 }
1937 
1938 static int gaudi_sw_fini(struct hl_device *hdev)
1939 {
1940 	struct gaudi_device *gaudi = hdev->asic_specific;
1941 
1942 	gaudi_free_internal_qmans_pq_mem(hdev);
1943 
1944 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1945 
1946 	if (!hdev->asic_prop.fw_security_enabled)
1947 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1948 					hdev->cpu_pci_msb_addr);
1949 
1950 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1951 					hdev->cpu_accessible_dma_address);
1952 
1953 	dma_pool_destroy(hdev->dma_pool);
1954 
1955 	kfree(gaudi);
1956 
1957 	return 0;
1958 }
1959 
1960 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1961 {
1962 	struct hl_device *hdev = arg;
1963 	int i;
1964 
1965 	if (hdev->disabled)
1966 		return IRQ_HANDLED;
1967 
1968 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1969 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1970 
1971 	hl_irq_handler_eq(irq, &hdev->event_queue);
1972 
1973 	return IRQ_HANDLED;
1974 }
1975 
1976 /*
1977  * For backward compatibility, new MSI interrupts should be set after the
1978  * existing CPU and NIC interrupts.
1979  */
1980 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1981 				bool cpu_eq)
1982 {
1983 	int msi_vec;
1984 
1985 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1986 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1987 				GAUDI_EVENT_QUEUE_MSI_IDX);
1988 
1989 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1990 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1991 
1992 	return pci_irq_vector(hdev->pdev, msi_vec);
1993 }
1994 
1995 static int gaudi_enable_msi_single(struct hl_device *hdev)
1996 {
1997 	int rc, irq;
1998 
1999 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2000 
2001 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2002 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2003 			"gaudi single msi", hdev);
2004 	if (rc)
2005 		dev_err(hdev->dev,
2006 			"Failed to request single MSI IRQ\n");
2007 
2008 	return rc;
2009 }
2010 
2011 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2012 {
2013 	int cq_cnt = hdev->asic_prop.completion_queues_count;
2014 	int rc, i, irq_cnt_init, irq;
2015 
2016 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2017 		irq = gaudi_pci_irq_vector(hdev, i, false);
2018 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2019 				&hdev->completion_queue[i]);
2020 		if (rc) {
2021 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2022 			goto free_irqs;
2023 		}
2024 	}
2025 
2026 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2027 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2028 				&hdev->event_queue);
2029 	if (rc) {
2030 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2031 		goto free_irqs;
2032 	}
2033 
2034 	return 0;
2035 
2036 free_irqs:
2037 	for (i = 0 ; i < irq_cnt_init ; i++)
2038 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2039 				&hdev->completion_queue[i]);
2040 	return rc;
2041 }
2042 
2043 static int gaudi_enable_msi(struct hl_device *hdev)
2044 {
2045 	struct gaudi_device *gaudi = hdev->asic_specific;
2046 	int rc;
2047 
2048 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2049 		return 0;
2050 
2051 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2052 	if (rc < 0) {
2053 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2054 		return rc;
2055 	}
2056 
2057 	if (rc < NUMBER_OF_INTERRUPTS) {
2058 		gaudi->multi_msi_mode = false;
2059 		rc = gaudi_enable_msi_single(hdev);
2060 	} else {
2061 		gaudi->multi_msi_mode = true;
2062 		rc = gaudi_enable_msi_multi(hdev);
2063 	}
2064 
2065 	if (rc)
2066 		goto free_pci_irq_vectors;
2067 
2068 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2069 
2070 	return 0;
2071 
2072 free_pci_irq_vectors:
2073 	pci_free_irq_vectors(hdev->pdev);
2074 	return rc;
2075 }
2076 
2077 static void gaudi_sync_irqs(struct hl_device *hdev)
2078 {
2079 	struct gaudi_device *gaudi = hdev->asic_specific;
2080 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2081 
2082 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2083 		return;
2084 
2085 	/* Wait for all pending IRQs to be finished */
2086 	if (gaudi->multi_msi_mode) {
2087 		for (i = 0 ; i < cq_cnt ; i++)
2088 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2089 
2090 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2091 						GAUDI_EVENT_QUEUE_MSI_IDX,
2092 						true));
2093 	} else {
2094 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2095 	}
2096 }
2097 
2098 static void gaudi_disable_msi(struct hl_device *hdev)
2099 {
2100 	struct gaudi_device *gaudi = hdev->asic_specific;
2101 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2102 
2103 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2104 		return;
2105 
2106 	gaudi_sync_irqs(hdev);
2107 
2108 	if (gaudi->multi_msi_mode) {
2109 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2110 						true);
2111 		free_irq(irq, &hdev->event_queue);
2112 
2113 		for (i = 0 ; i < cq_cnt ; i++) {
2114 			irq = gaudi_pci_irq_vector(hdev, i, false);
2115 			free_irq(irq, &hdev->completion_queue[i]);
2116 		}
2117 	} else {
2118 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2119 	}
2120 
2121 	pci_free_irq_vectors(hdev->pdev);
2122 
2123 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2124 }
2125 
2126 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2127 {
2128 	struct gaudi_device *gaudi = hdev->asic_specific;
2129 
2130 	if (hdev->asic_prop.fw_security_enabled)
2131 		return;
2132 
2133 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2134 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2135 		return;
2136 
2137 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2138 		return;
2139 
2140 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2141 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2142 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2143 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2145 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2147 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2149 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2151 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156 
2157 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2166 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173 
2174 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2175 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2176 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2177 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2179 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2181 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2183 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2185 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190 
2191 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2192 }
2193 
2194 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2195 {
2196 	struct gaudi_device *gaudi = hdev->asic_specific;
2197 
2198 	if (hdev->asic_prop.fw_security_enabled)
2199 		return;
2200 
2201 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2202 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2203 		return;
2204 
2205 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2206 		return;
2207 
2208 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 
2225 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2226 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2228 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2230 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2232 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2234 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2236 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2238 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2240 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241 
2242 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2243 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2244 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2245 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2247 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2249 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2251 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2253 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2255 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2257 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258 
2259 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2260 }
2261 
2262 static void gaudi_init_e2e(struct hl_device *hdev)
2263 {
2264 	if (hdev->asic_prop.fw_security_enabled)
2265 		return;
2266 
2267 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2268 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2269 		return;
2270 
2271 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2272 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2273 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2274 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2275 
2276 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2277 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2278 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2279 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2280 
2281 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2282 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2283 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2284 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2285 
2286 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2287 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2288 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2289 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2290 
2291 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2292 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2293 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2294 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2295 
2296 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2297 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2298 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2299 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2300 
2301 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2302 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2303 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2304 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2305 
2306 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2307 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2308 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2309 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2310 
2311 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2312 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2313 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2314 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2315 
2316 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2317 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2318 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2319 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2320 
2321 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2322 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2323 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2324 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2325 
2326 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2327 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2328 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2329 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2330 
2331 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2332 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2333 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2334 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2335 
2336 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2337 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2338 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2339 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2340 
2341 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2342 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2343 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2344 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2345 
2346 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2347 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2348 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2349 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2350 
2351 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2352 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2353 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2354 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2355 
2356 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2357 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2358 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2359 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2360 
2361 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2362 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2363 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2364 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2365 
2366 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2367 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2368 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2369 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2370 
2371 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2372 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2373 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2374 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2375 
2376 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2377 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2378 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2379 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2380 
2381 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2382 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2383 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2384 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2385 
2386 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2387 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2388 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2389 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2390 
2391 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2392 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2393 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2394 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2395 
2396 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2397 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2398 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2399 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2400 
2401 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2402 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2403 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2404 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2405 
2406 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2407 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2408 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2409 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410 
2411 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2412 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2413 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2414 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415 
2416 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2417 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2418 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2419 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2420 
2421 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2422 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2423 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2424 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2425 
2426 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2427 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2428 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2429 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2430 
2431 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2432 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2433 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2434 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2435 
2436 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2437 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2438 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2439 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2440 
2441 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2442 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2443 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2444 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2445 
2446 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2447 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2448 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2449 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2450 
2451 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2452 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2453 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2454 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2455 
2456 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2457 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2458 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2459 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2460 
2461 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2462 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2463 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2464 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2465 
2466 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2467 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2468 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2469 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2470 
2471 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2472 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2473 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2474 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2475 
2476 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2477 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2478 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2479 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2480 
2481 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2482 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2483 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2484 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2485 
2486 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2487 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2488 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2489 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2490 
2491 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2492 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2493 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2494 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2495 
2496 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2497 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2498 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2499 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2500 
2501 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2502 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2503 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2504 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2505 
2506 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2507 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2508 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2509 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2510 }
2511 
2512 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2513 {
2514 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2515 
2516 	if (hdev->asic_prop.fw_security_enabled)
2517 		return;
2518 
2519 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2520 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2521 		return;
2522 
2523 	hbm0_wr = 0x33333333;
2524 	hbm0_rd = 0x77777777;
2525 	hbm1_wr = 0x55555555;
2526 	hbm1_rd = 0xDDDDDDDD;
2527 
2528 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2529 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2530 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2531 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2532 
2533 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2534 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2535 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2536 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2537 
2538 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2539 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2540 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2541 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2542 
2543 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2544 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2545 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2546 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2547 
2548 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2549 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2550 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2551 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2552 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2553 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2554 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2555 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2556 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2557 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2558 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2559 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2560 
2561 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2562 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2563 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2564 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2565 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2566 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2567 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2568 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2569 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2570 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2571 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2572 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2573 }
2574 
2575 static void gaudi_init_golden_registers(struct hl_device *hdev)
2576 {
2577 	u32 tpc_offset;
2578 	int tpc_id, i;
2579 
2580 	gaudi_init_e2e(hdev);
2581 	gaudi_init_hbm_cred(hdev);
2582 
2583 	for (tpc_id = 0, tpc_offset = 0;
2584 				tpc_id < TPC_NUMBER_OF_ENGINES;
2585 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2586 		/* Mask all arithmetic interrupts from TPC */
2587 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2588 		/* Set 16 cache lines */
2589 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2590 				ICACHE_FETCH_LINE_NUM, 2);
2591 	}
2592 
2593 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2594 	for (i = 0 ; i < 128 ; i += 8)
2595 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2596 
2597 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2598 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2599 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2600 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2601 }
2602 
2603 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2604 					int qman_id, dma_addr_t qman_pq_addr)
2605 {
2606 	struct cpu_dyn_regs *dyn_regs =
2607 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2608 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2609 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2610 	u32 q_off, dma_qm_offset;
2611 	u32 dma_qm_err_cfg, irq_handler_offset;
2612 
2613 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2614 
2615 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2616 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2617 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2618 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2619 	so_base_en_lo = lower_32_bits(CFG_BASE +
2620 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2621 	so_base_en_hi = upper_32_bits(CFG_BASE +
2622 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2623 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2624 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2625 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2626 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2627 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2628 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2629 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2630 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631 
2632 	q_off = dma_qm_offset + qman_id * 4;
2633 
2634 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2635 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2636 
2637 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2638 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2639 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2640 
2641 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2642 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2643 							QMAN_LDMA_SRC_OFFSET);
2644 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2645 							QMAN_LDMA_DST_OFFSET);
2646 
2647 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2648 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2649 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2650 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2651 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2652 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2653 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2654 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2655 
2656 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2657 
2658 	/* The following configuration is needed only once per QMAN */
2659 	if (qman_id == 0) {
2660 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2661 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2662 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2663 
2664 		/* Configure RAZWI IRQ */
2665 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2666 		if (hdev->stop_on_err)
2667 			dma_qm_err_cfg |=
2668 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2669 
2670 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2671 
2672 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2673 			lower_32_bits(CFG_BASE + irq_handler_offset));
2674 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2675 			upper_32_bits(CFG_BASE + irq_handler_offset));
2676 
2677 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2678 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2679 									dma_id);
2680 
2681 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2682 				QM_ARB_ERR_MSG_EN_MASK);
2683 
2684 		/* Set timeout to maximum */
2685 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2686 
2687 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2688 				QMAN_EXTERNAL_MAKE_TRUSTED);
2689 
2690 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2691 	}
2692 }
2693 
2694 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2695 {
2696 	struct cpu_dyn_regs *dyn_regs =
2697 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2698 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2699 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2700 	u32 irq_handler_offset;
2701 
2702 	/* Set to maximum possible according to physical size */
2703 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2704 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2705 
2706 	/* WA for H/W bug H3-2116 */
2707 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2708 
2709 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2710 	if (hdev->stop_on_err)
2711 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2712 
2713 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2714 
2715 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2716 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2717 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2718 
2719 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2720 		lower_32_bits(CFG_BASE + irq_handler_offset));
2721 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2722 		upper_32_bits(CFG_BASE + irq_handler_offset));
2723 
2724 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2725 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2726 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2727 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2728 	/* If the channel is secured, it should be in MMU bypass mode */
2729 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2730 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2731 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2732 }
2733 
2734 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2735 				u32 enable_mask)
2736 {
2737 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738 
2739 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2740 }
2741 
2742 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2743 {
2744 	struct gaudi_device *gaudi = hdev->asic_specific;
2745 	struct hl_hw_queue *q;
2746 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2747 
2748 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2749 		return;
2750 
2751 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2752 		dma_id = gaudi_dma_assignment[i];
2753 		/*
2754 		 * For queues after the CPU Q need to add 1 to get the correct
2755 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2756 		 * order to get the correct MSI register.
2757 		 */
2758 		if (dma_id > 1) {
2759 			cpu_skip = 1;
2760 			nic_skip = NIC_NUMBER_OF_ENGINES;
2761 		} else {
2762 			cpu_skip = 0;
2763 			nic_skip = 0;
2764 		}
2765 
2766 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2767 			q_idx = 4 * dma_id + j + cpu_skip;
2768 			q = &hdev->kernel_queues[q_idx];
2769 			q->cq_id = cq_id++;
2770 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2771 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2772 						q->bus_address);
2773 		}
2774 
2775 		gaudi_init_dma_core(hdev, dma_id);
2776 
2777 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2778 	}
2779 
2780 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2781 }
2782 
2783 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2784 					int qman_id, u64 qman_base_addr)
2785 {
2786 	struct cpu_dyn_regs *dyn_regs =
2787 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2788 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2789 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2790 	u32 dma_qm_err_cfg, irq_handler_offset;
2791 	u32 q_off, dma_qm_offset;
2792 
2793 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2794 
2795 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2796 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2797 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2798 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2799 	so_base_en_lo = lower_32_bits(CFG_BASE +
2800 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2801 	so_base_en_hi = upper_32_bits(CFG_BASE +
2802 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2803 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2804 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2805 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2806 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2807 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2808 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2809 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2810 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811 
2812 	q_off = dma_qm_offset + qman_id * 4;
2813 
2814 	if (qman_id < 4) {
2815 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2816 					lower_32_bits(qman_base_addr));
2817 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2818 					upper_32_bits(qman_base_addr));
2819 
2820 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2821 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2822 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2823 
2824 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2825 							QMAN_CPDMA_SIZE_OFFSET);
2826 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2827 							QMAN_CPDMA_SRC_OFFSET);
2828 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2829 							QMAN_CPDMA_DST_OFFSET);
2830 	} else {
2831 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2832 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2833 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2834 
2835 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2836 							QMAN_LDMA_SIZE_OFFSET);
2837 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2838 							QMAN_LDMA_SRC_OFFSET);
2839 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2840 							QMAN_LDMA_DST_OFFSET);
2841 
2842 		/* Configure RAZWI IRQ */
2843 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2844 		if (hdev->stop_on_err)
2845 			dma_qm_err_cfg |=
2846 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2847 
2848 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2849 
2850 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2851 			lower_32_bits(CFG_BASE + irq_handler_offset));
2852 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2853 			upper_32_bits(CFG_BASE + irq_handler_offset));
2854 
2855 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2856 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2857 									dma_id);
2858 
2859 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2860 				QM_ARB_ERR_MSG_EN_MASK);
2861 
2862 		/* Set timeout to maximum */
2863 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2864 
2865 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2866 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2867 				QMAN_INTERNAL_MAKE_TRUSTED);
2868 	}
2869 
2870 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2871 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2872 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2873 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2874 
2875 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2876 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2877 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2878 				mtr_base_ws_lo);
2879 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2880 				mtr_base_ws_hi);
2881 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2882 				so_base_ws_lo);
2883 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2884 				so_base_ws_hi);
2885 	}
2886 }
2887 
2888 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2889 {
2890 	struct gaudi_device *gaudi = hdev->asic_specific;
2891 	struct gaudi_internal_qman_info *q;
2892 	u64 qman_base_addr;
2893 	int i, j, dma_id, internal_q_index;
2894 
2895 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2896 		return;
2897 
2898 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2899 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2900 
2901 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2902 			 /*
2903 			  * Add the CPU queue in order to get the correct queue
2904 			  * number as all internal queue are placed after it
2905 			  */
2906 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2907 
2908 			q = &gaudi->internal_qmans[internal_q_index];
2909 			qman_base_addr = (u64) q->pq_dma_addr;
2910 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2911 						qman_base_addr);
2912 		}
2913 
2914 		/* Initializing lower CP for HBM DMA QMAN */
2915 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2916 
2917 		gaudi_init_dma_core(hdev, dma_id);
2918 
2919 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2920 	}
2921 
2922 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2923 }
2924 
2925 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2926 					int qman_id, u64 qman_base_addr)
2927 {
2928 	struct cpu_dyn_regs *dyn_regs =
2929 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2930 	u32 mtr_base_lo, mtr_base_hi;
2931 	u32 so_base_lo, so_base_hi;
2932 	u32 irq_handler_offset;
2933 	u32 q_off, mme_id;
2934 	u32 mme_qm_err_cfg;
2935 
2936 	mtr_base_lo = lower_32_bits(CFG_BASE +
2937 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2938 	mtr_base_hi = upper_32_bits(CFG_BASE +
2939 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2940 	so_base_lo = lower_32_bits(CFG_BASE +
2941 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2942 	so_base_hi = upper_32_bits(CFG_BASE +
2943 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2944 
2945 	q_off = mme_offset + qman_id * 4;
2946 
2947 	if (qman_id < 4) {
2948 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2949 					lower_32_bits(qman_base_addr));
2950 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2951 					upper_32_bits(qman_base_addr));
2952 
2953 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2954 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2955 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2956 
2957 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2958 							QMAN_CPDMA_SIZE_OFFSET);
2959 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2960 							QMAN_CPDMA_SRC_OFFSET);
2961 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2962 							QMAN_CPDMA_DST_OFFSET);
2963 	} else {
2964 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2965 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2966 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2967 
2968 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2969 							QMAN_LDMA_SIZE_OFFSET);
2970 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2971 							QMAN_LDMA_SRC_OFFSET);
2972 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2973 							QMAN_LDMA_DST_OFFSET);
2974 
2975 		/* Configure RAZWI IRQ */
2976 		mme_id = mme_offset /
2977 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2978 
2979 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2980 		if (hdev->stop_on_err)
2981 			mme_qm_err_cfg |=
2982 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2983 
2984 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2985 
2986 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2987 			lower_32_bits(CFG_BASE + irq_handler_offset));
2988 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2989 			upper_32_bits(CFG_BASE + irq_handler_offset));
2990 
2991 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2992 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2993 									mme_id);
2994 
2995 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2996 				QM_ARB_ERR_MSG_EN_MASK);
2997 
2998 		/* Set timeout to maximum */
2999 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3000 
3001 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3002 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3003 				QMAN_INTERNAL_MAKE_TRUSTED);
3004 	}
3005 
3006 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3007 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3008 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3009 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3010 }
3011 
3012 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3013 {
3014 	struct gaudi_device *gaudi = hdev->asic_specific;
3015 	struct gaudi_internal_qman_info *q;
3016 	u64 qman_base_addr;
3017 	u32 mme_offset;
3018 	int i, internal_q_index;
3019 
3020 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3021 		return;
3022 
3023 	/*
3024 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3025 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3026 	 */
3027 
3028 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3029 
3030 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3031 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3032 		q = &gaudi->internal_qmans[internal_q_index];
3033 		qman_base_addr = (u64) q->pq_dma_addr;
3034 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3035 					qman_base_addr);
3036 		if (i == 3)
3037 			mme_offset = 0;
3038 	}
3039 
3040 	/* Initializing lower CP for MME QMANs */
3041 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3042 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3043 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3044 
3045 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3046 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3047 
3048 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3049 }
3050 
3051 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3052 				int qman_id, u64 qman_base_addr)
3053 {
3054 	struct cpu_dyn_regs *dyn_regs =
3055 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3056 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3057 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3058 	u32 tpc_qm_err_cfg, irq_handler_offset;
3059 	u32 q_off, tpc_id;
3060 
3061 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3062 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3063 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3064 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065 	so_base_en_lo = lower_32_bits(CFG_BASE +
3066 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3067 	so_base_en_hi = upper_32_bits(CFG_BASE +
3068 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3069 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3070 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3071 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3072 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3073 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3074 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3075 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3076 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077 
3078 	q_off = tpc_offset + qman_id * 4;
3079 
3080 	tpc_id = tpc_offset /
3081 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3082 
3083 	if (qman_id < 4) {
3084 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3085 					lower_32_bits(qman_base_addr));
3086 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3087 					upper_32_bits(qman_base_addr));
3088 
3089 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3090 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3091 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3092 
3093 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3094 							QMAN_CPDMA_SIZE_OFFSET);
3095 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3096 							QMAN_CPDMA_SRC_OFFSET);
3097 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3098 							QMAN_CPDMA_DST_OFFSET);
3099 	} else {
3100 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3101 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3102 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3103 
3104 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3105 							QMAN_LDMA_SIZE_OFFSET);
3106 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3107 							QMAN_LDMA_SRC_OFFSET);
3108 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3109 							QMAN_LDMA_DST_OFFSET);
3110 
3111 		/* Configure RAZWI IRQ */
3112 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3113 		if (hdev->stop_on_err)
3114 			tpc_qm_err_cfg |=
3115 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3116 
3117 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3118 
3119 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3120 			lower_32_bits(CFG_BASE + irq_handler_offset));
3121 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3122 			upper_32_bits(CFG_BASE + irq_handler_offset));
3123 
3124 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3125 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3126 									tpc_id);
3127 
3128 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3129 				QM_ARB_ERR_MSG_EN_MASK);
3130 
3131 		/* Set timeout to maximum */
3132 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3133 
3134 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3135 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3136 				QMAN_INTERNAL_MAKE_TRUSTED);
3137 	}
3138 
3139 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3140 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3141 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3142 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3143 
3144 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3145 	if (tpc_id == 6) {
3146 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3147 				mtr_base_ws_lo);
3148 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3149 				mtr_base_ws_hi);
3150 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3151 				so_base_ws_lo);
3152 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3153 				so_base_ws_hi);
3154 	}
3155 }
3156 
3157 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3158 {
3159 	struct gaudi_device *gaudi = hdev->asic_specific;
3160 	struct gaudi_internal_qman_info *q;
3161 	u64 qman_base_addr;
3162 	u32 so_base_hi, tpc_offset = 0;
3163 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3164 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3165 	int i, tpc_id, internal_q_index;
3166 
3167 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3168 		return;
3169 
3170 	so_base_hi = upper_32_bits(CFG_BASE +
3171 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172 
3173 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3174 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3175 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3176 						tpc_id * QMAN_STREAMS + i;
3177 			q = &gaudi->internal_qmans[internal_q_index];
3178 			qman_base_addr = (u64) q->pq_dma_addr;
3179 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3180 						qman_base_addr);
3181 
3182 			if (i == 3) {
3183 				/* Initializing lower CP for TPC QMAN */
3184 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3185 
3186 				/* Enable the QMAN and TPC channel */
3187 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3188 						QMAN_TPC_ENABLE);
3189 			}
3190 		}
3191 
3192 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3193 				so_base_hi);
3194 
3195 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3196 
3197 		gaudi->hw_cap_initialized |=
3198 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3199 	}
3200 }
3201 
3202 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3203 				int qman_id, u64 qman_base_addr, int nic_id)
3204 {
3205 	struct cpu_dyn_regs *dyn_regs =
3206 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3207 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3208 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3209 	u32 nic_qm_err_cfg, irq_handler_offset;
3210 	u32 q_off;
3211 
3212 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3213 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3214 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3215 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3216 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3217 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3218 	so_base_en_hi = upper_32_bits(CFG_BASE +
3219 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3220 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3221 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3222 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3223 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3224 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3225 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3226 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3227 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228 
3229 	q_off = nic_offset + qman_id * 4;
3230 
3231 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3232 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3233 
3234 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3235 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3236 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3237 
3238 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3239 							QMAN_LDMA_SIZE_OFFSET);
3240 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3241 							QMAN_LDMA_SRC_OFFSET);
3242 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3243 							QMAN_LDMA_DST_OFFSET);
3244 
3245 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3246 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3247 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3248 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3249 
3250 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3251 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3252 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3253 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3254 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3255 
3256 	if (qman_id == 0) {
3257 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3258 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3259 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3260 
3261 		/* Configure RAZWI IRQ */
3262 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3263 		if (hdev->stop_on_err)
3264 			nic_qm_err_cfg |=
3265 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3266 
3267 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3268 
3269 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3270 			lower_32_bits(CFG_BASE + irq_handler_offset));
3271 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3272 			upper_32_bits(CFG_BASE + irq_handler_offset));
3273 
3274 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3275 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3276 									nic_id);
3277 
3278 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3279 				QM_ARB_ERR_MSG_EN_MASK);
3280 
3281 		/* Set timeout to maximum */
3282 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3283 
3284 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3285 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3286 				QMAN_INTERNAL_MAKE_TRUSTED);
3287 	}
3288 }
3289 
3290 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3291 {
3292 	struct gaudi_device *gaudi = hdev->asic_specific;
3293 	struct gaudi_internal_qman_info *q;
3294 	u64 qman_base_addr;
3295 	u32 nic_offset = 0;
3296 	u32 nic_delta_between_qmans =
3297 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3298 	u32 nic_delta_between_nics =
3299 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3300 	int i, nic_id, internal_q_index;
3301 
3302 	if (!hdev->nic_ports_mask)
3303 		return;
3304 
3305 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3306 		return;
3307 
3308 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3309 
3310 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3311 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3312 			nic_offset += nic_delta_between_qmans;
3313 			if (nic_id & 1) {
3314 				nic_offset -= (nic_delta_between_qmans * 2);
3315 				nic_offset += nic_delta_between_nics;
3316 			}
3317 			continue;
3318 		}
3319 
3320 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3321 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3322 						nic_id * QMAN_STREAMS + i;
3323 			q = &gaudi->internal_qmans[internal_q_index];
3324 			qman_base_addr = (u64) q->pq_dma_addr;
3325 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3326 						qman_base_addr, nic_id);
3327 		}
3328 
3329 		/* Enable the QMAN */
3330 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3331 
3332 		nic_offset += nic_delta_between_qmans;
3333 		if (nic_id & 1) {
3334 			nic_offset -= (nic_delta_between_qmans * 2);
3335 			nic_offset += nic_delta_between_nics;
3336 		}
3337 
3338 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3339 	}
3340 }
3341 
3342 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3343 {
3344 	struct gaudi_device *gaudi = hdev->asic_specific;
3345 
3346 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3347 		return;
3348 
3349 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3350 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3351 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3352 }
3353 
3354 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3355 {
3356 	struct gaudi_device *gaudi = hdev->asic_specific;
3357 
3358 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3359 		return;
3360 
3361 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3362 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3363 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3364 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3365 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3366 }
3367 
3368 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3369 {
3370 	struct gaudi_device *gaudi = hdev->asic_specific;
3371 
3372 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3373 		return;
3374 
3375 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3376 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3377 }
3378 
3379 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3380 {
3381 	struct gaudi_device *gaudi = hdev->asic_specific;
3382 	u32 tpc_offset = 0;
3383 	int tpc_id;
3384 
3385 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3386 		return;
3387 
3388 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3389 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3390 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3391 	}
3392 }
3393 
3394 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3395 {
3396 	struct gaudi_device *gaudi = hdev->asic_specific;
3397 	u32 nic_mask, nic_offset = 0;
3398 	u32 nic_delta_between_qmans =
3399 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3400 	u32 nic_delta_between_nics =
3401 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3402 	int nic_id;
3403 
3404 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3405 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3406 
3407 		if (gaudi->hw_cap_initialized & nic_mask)
3408 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3409 
3410 		nic_offset += nic_delta_between_qmans;
3411 		if (nic_id & 1) {
3412 			nic_offset -= (nic_delta_between_qmans * 2);
3413 			nic_offset += nic_delta_between_nics;
3414 		}
3415 	}
3416 }
3417 
3418 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3419 {
3420 	struct gaudi_device *gaudi = hdev->asic_specific;
3421 
3422 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3423 		return;
3424 
3425 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3426 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3427 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3428 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3429 }
3430 
3431 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3432 {
3433 	struct gaudi_device *gaudi = hdev->asic_specific;
3434 
3435 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3436 		return;
3437 
3438 	/* Stop CPs of HBM DMA QMANs */
3439 
3440 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3441 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3445 }
3446 
3447 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3448 {
3449 	struct gaudi_device *gaudi = hdev->asic_specific;
3450 
3451 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3452 		return;
3453 
3454 	/* Stop CPs of MME QMANs */
3455 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3456 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3457 }
3458 
3459 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3460 {
3461 	struct gaudi_device *gaudi = hdev->asic_specific;
3462 
3463 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3464 		return;
3465 
3466 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3467 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3468 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474 }
3475 
3476 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3477 {
3478 	struct gaudi_device *gaudi = hdev->asic_specific;
3479 
3480 	/* Stop upper CPs of QMANs */
3481 
3482 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3483 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3484 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3485 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3486 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3487 
3488 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3489 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3490 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3491 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3492 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3493 
3494 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3495 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3496 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3497 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3498 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3499 
3500 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3501 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3502 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3503 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3504 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3505 
3506 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3507 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3508 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3509 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3510 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3511 
3512 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3513 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3514 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3515 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3516 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3517 
3518 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3519 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3520 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3521 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3522 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3523 
3524 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3525 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3526 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3527 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3528 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3529 
3530 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3531 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3532 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3533 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3534 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3535 
3536 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3537 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3538 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3539 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3540 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3541 }
3542 
3543 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3544 {
3545 	struct gaudi_device *gaudi = hdev->asic_specific;
3546 
3547 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3548 		return;
3549 
3550 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3551 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3552 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3553 }
3554 
3555 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3556 {
3557 	struct gaudi_device *gaudi = hdev->asic_specific;
3558 
3559 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3560 		return;
3561 
3562 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3563 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3564 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567 }
3568 
3569 static void gaudi_mme_stall(struct hl_device *hdev)
3570 {
3571 	struct gaudi_device *gaudi = hdev->asic_specific;
3572 
3573 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3574 		return;
3575 
3576 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3577 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3578 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3579 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3580 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3581 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3582 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3583 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3584 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3585 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3586 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3587 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3588 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3589 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3590 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3591 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3592 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3593 }
3594 
3595 static void gaudi_tpc_stall(struct hl_device *hdev)
3596 {
3597 	struct gaudi_device *gaudi = hdev->asic_specific;
3598 
3599 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3600 		return;
3601 
3602 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3603 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3604 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3609 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3610 }
3611 
3612 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3613 {
3614 	u32 qman_offset;
3615 	int i;
3616 
3617 	if (hdev->asic_prop.fw_security_enabled)
3618 		return;
3619 
3620 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3621 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3622 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3623 
3624 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3625 	}
3626 
3627 	WREG32(mmMME0_QM_CGM_CFG, 0);
3628 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3629 	WREG32(mmMME2_QM_CGM_CFG, 0);
3630 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3631 
3632 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3633 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3634 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3635 
3636 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3637 	}
3638 }
3639 
3640 static void gaudi_enable_timestamp(struct hl_device *hdev)
3641 {
3642 	/* Disable the timestamp counter */
3643 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3644 
3645 	/* Zero the lower/upper parts of the 64-bit counter */
3646 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3647 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3648 
3649 	/* Enable the counter */
3650 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3651 }
3652 
3653 static void gaudi_disable_timestamp(struct hl_device *hdev)
3654 {
3655 	/* Disable the timestamp counter */
3656 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3657 }
3658 
3659 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3660 {
3661 	u32 wait_timeout_ms;
3662 
3663 	if (hdev->pldm)
3664 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3665 	else
3666 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3667 
3668 	if (fw_reset)
3669 		goto skip_engines;
3670 
3671 	gaudi_stop_nic_qmans(hdev);
3672 	gaudi_stop_mme_qmans(hdev);
3673 	gaudi_stop_tpc_qmans(hdev);
3674 	gaudi_stop_hbm_dma_qmans(hdev);
3675 	gaudi_stop_pci_dma_qmans(hdev);
3676 
3677 	msleep(wait_timeout_ms);
3678 
3679 	gaudi_pci_dma_stall(hdev);
3680 	gaudi_hbm_dma_stall(hdev);
3681 	gaudi_tpc_stall(hdev);
3682 	gaudi_mme_stall(hdev);
3683 
3684 	msleep(wait_timeout_ms);
3685 
3686 	gaudi_disable_nic_qmans(hdev);
3687 	gaudi_disable_mme_qmans(hdev);
3688 	gaudi_disable_tpc_qmans(hdev);
3689 	gaudi_disable_hbm_dma_qmans(hdev);
3690 	gaudi_disable_pci_dma_qmans(hdev);
3691 
3692 	gaudi_disable_timestamp(hdev);
3693 
3694 skip_engines:
3695 	gaudi_disable_msi(hdev);
3696 }
3697 
3698 static int gaudi_mmu_init(struct hl_device *hdev)
3699 {
3700 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3701 	struct gaudi_device *gaudi = hdev->asic_specific;
3702 	u64 hop0_addr;
3703 	int rc, i;
3704 
3705 	if (!hdev->mmu_enable)
3706 		return 0;
3707 
3708 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3709 		return 0;
3710 
3711 	for (i = 0 ; i < prop->max_asid ; i++) {
3712 		hop0_addr = prop->mmu_pgt_addr +
3713 				(i * prop->mmu_hop_table_size);
3714 
3715 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3716 		if (rc) {
3717 			dev_err(hdev->dev,
3718 				"failed to set hop0 addr for asid %d\n", i);
3719 			goto err;
3720 		}
3721 	}
3722 
3723 	/* init MMU cache manage page */
3724 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3725 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3726 
3727 	/* mem cache invalidation */
3728 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3729 
3730 	hl_mmu_invalidate_cache(hdev, true, 0);
3731 
3732 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3733 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3734 
3735 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3736 
3737 	/*
3738 	 * The H/W expects the first PI after init to be 1. After wraparound
3739 	 * we'll write 0.
3740 	 */
3741 	gaudi->mmu_cache_inv_pi = 1;
3742 
3743 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3744 
3745 	return 0;
3746 
3747 err:
3748 	return rc;
3749 }
3750 
3751 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3752 {
3753 	void __iomem *dst;
3754 
3755 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3756 
3757 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3758 }
3759 
3760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3761 {
3762 	void __iomem *dst;
3763 
3764 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3765 
3766 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3767 }
3768 
3769 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3770 {
3771 	struct dynamic_fw_load_mgr *dynamic_loader;
3772 	struct cpu_dyn_regs *dyn_regs;
3773 
3774 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3775 
3776 	/*
3777 	 * here we update initial values for few specific dynamic regs (as
3778 	 * before reading the first descriptor from FW those value has to be
3779 	 * hard-coded) in later stages of the protocol those values will be
3780 	 * updated automatically by reading the FW descriptor so data there
3781 	 * will always be up-to-date
3782 	 */
3783 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3784 	dyn_regs->kmd_msg_to_cpu =
3785 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3786 	dyn_regs->cpu_cmd_status_to_host =
3787 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3788 
3789 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3790 }
3791 
3792 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3793 {
3794 	struct static_fw_load_mgr *static_loader;
3795 
3796 	static_loader = &hdev->fw_loader.static_loader;
3797 
3798 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3799 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3800 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3801 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3802 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3803 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3804 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3805 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3806 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3807 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3808 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3809 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3810 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3811 			GAUDI_PLDM_RESET_WAIT_MSEC :
3812 			GAUDI_CPU_RESET_WAIT_MSEC;
3813 }
3814 
3815 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3816 {
3817 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3818 
3819 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3820 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3821 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3822 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3823 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3824 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3825 }
3826 
3827 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3828 {
3829 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3830 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3831 
3832 	/* fill common fields */
3833 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3834 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3835 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3836 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3837 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3838 	fw_loader->skip_bmc = !hdev->bmc_enable;
3839 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3840 	fw_loader->dram_bar_id = HBM_BAR_ID;
3841 
3842 	if (prop->dynamic_fw_load)
3843 		gaudi_init_dynamic_firmware_loader(hdev);
3844 	else
3845 		gaudi_init_static_firmware_loader(hdev);
3846 }
3847 
3848 static int gaudi_init_cpu(struct hl_device *hdev)
3849 {
3850 	struct gaudi_device *gaudi = hdev->asic_specific;
3851 	int rc;
3852 
3853 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3854 		return 0;
3855 
3856 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3857 		return 0;
3858 
3859 	/*
3860 	 * The device CPU works with 40 bits addresses.
3861 	 * This register sets the extension to 50 bits.
3862 	 */
3863 	if (!hdev->asic_prop.fw_security_enabled)
3864 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3865 
3866 	rc = hl_fw_init_cpu(hdev);
3867 
3868 	if (rc)
3869 		return rc;
3870 
3871 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3872 
3873 	return 0;
3874 }
3875 
3876 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3877 {
3878 	struct cpu_dyn_regs *dyn_regs =
3879 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3880 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3881 	struct gaudi_device *gaudi = hdev->asic_specific;
3882 	u32 status, irq_handler_offset;
3883 	struct hl_eq *eq;
3884 	struct hl_hw_queue *cpu_pq =
3885 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3886 	int err;
3887 
3888 	if (!hdev->cpu_queues_enable)
3889 		return 0;
3890 
3891 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3892 		return 0;
3893 
3894 	eq = &hdev->event_queue;
3895 
3896 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3897 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3898 
3899 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3900 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3901 
3902 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3903 			lower_32_bits(hdev->cpu_accessible_dma_address));
3904 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3905 			upper_32_bits(hdev->cpu_accessible_dma_address));
3906 
3907 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3908 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3909 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3910 
3911 	/* Used for EQ CI */
3912 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3913 
3914 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3915 
3916 	if (gaudi->multi_msi_mode)
3917 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3918 	else
3919 		WREG32(mmCPU_IF_QUEUE_INIT,
3920 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3921 
3922 	irq_handler_offset = prop->gic_interrupts_enable ?
3923 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3924 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3925 
3926 	WREG32(irq_handler_offset,
3927 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3928 
3929 	err = hl_poll_timeout(
3930 		hdev,
3931 		mmCPU_IF_QUEUE_INIT,
3932 		status,
3933 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3934 		1000,
3935 		cpu_timeout);
3936 
3937 	if (err) {
3938 		dev_err(hdev->dev,
3939 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3940 		return -EIO;
3941 	}
3942 
3943 	/* update FW application security bits */
3944 	if (prop->fw_cpu_boot_dev_sts0_valid)
3945 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3946 	if (prop->fw_cpu_boot_dev_sts1_valid)
3947 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3948 
3949 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3950 	return 0;
3951 }
3952 
3953 static void gaudi_pre_hw_init(struct hl_device *hdev)
3954 {
3955 	/* Perform read from the device to make sure device is up */
3956 	RREG32(mmHW_STATE);
3957 
3958 	if (!hdev->asic_prop.fw_security_enabled) {
3959 		/* Set the access through PCI bars (Linux driver only) as
3960 		 * secured
3961 		 */
3962 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3963 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3964 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3965 
3966 		/* Perform read to flush the waiting writes to ensure
3967 		 * configuration was set in the device
3968 		 */
3969 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3970 	}
3971 
3972 	/*
3973 	 * Let's mark in the H/W that we have reached this point. We check
3974 	 * this value in the reset_before_init function to understand whether
3975 	 * we need to reset the chip before doing H/W init. This register is
3976 	 * cleared by the H/W upon H/W reset
3977 	 */
3978 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3979 }
3980 
3981 static int gaudi_hw_init(struct hl_device *hdev)
3982 {
3983 	struct gaudi_device *gaudi = hdev->asic_specific;
3984 	int rc;
3985 
3986 	gaudi_pre_hw_init(hdev);
3987 
3988 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3989 	 * So we set it here and if anyone tries to move it later to
3990 	 * a different address, there will be an error
3991 	 */
3992 	if (hdev->asic_prop.iatu_done_by_fw)
3993 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3994 
3995 	/*
3996 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3997 	 * base address of dram
3998 	 */
3999 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4000 		dev_err(hdev->dev,
4001 			"failed to map HBM bar to DRAM base address\n");
4002 		return -EIO;
4003 	}
4004 
4005 	rc = gaudi_init_cpu(hdev);
4006 	if (rc) {
4007 		dev_err(hdev->dev, "failed to initialize CPU\n");
4008 		return rc;
4009 	}
4010 
4011 	/* In case the clock gating was enabled in preboot we need to disable
4012 	 * it here before touching the MME/TPC registers.
4013 	 */
4014 	gaudi_disable_clock_gating(hdev);
4015 
4016 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4017 	gaudi_init_scrambler_sram(hdev);
4018 
4019 	/* This is here just in case we are working without CPU */
4020 	gaudi_init_scrambler_hbm(hdev);
4021 
4022 	gaudi_init_golden_registers(hdev);
4023 
4024 	rc = gaudi_mmu_init(hdev);
4025 	if (rc)
4026 		return rc;
4027 
4028 	gaudi_init_security(hdev);
4029 
4030 	gaudi_init_pci_dma_qmans(hdev);
4031 
4032 	gaudi_init_hbm_dma_qmans(hdev);
4033 
4034 	gaudi_init_mme_qmans(hdev);
4035 
4036 	gaudi_init_tpc_qmans(hdev);
4037 
4038 	gaudi_init_nic_qmans(hdev);
4039 
4040 	gaudi_enable_timestamp(hdev);
4041 
4042 	/* MSI must be enabled before CPU queues and NIC are initialized */
4043 	rc = gaudi_enable_msi(hdev);
4044 	if (rc)
4045 		goto disable_queues;
4046 
4047 	/* must be called after MSI was enabled */
4048 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4049 	if (rc) {
4050 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4051 			rc);
4052 		goto disable_msi;
4053 	}
4054 
4055 	/* Perform read from the device to flush all configuration */
4056 	RREG32(mmHW_STATE);
4057 
4058 	return 0;
4059 
4060 disable_msi:
4061 	gaudi_disable_msi(hdev);
4062 disable_queues:
4063 	gaudi_disable_mme_qmans(hdev);
4064 	gaudi_disable_pci_dma_qmans(hdev);
4065 
4066 	return rc;
4067 }
4068 
4069 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4070 {
4071 	struct cpu_dyn_regs *dyn_regs =
4072 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4073 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4074 	struct gaudi_device *gaudi = hdev->asic_specific;
4075 	bool driver_performs_reset;
4076 
4077 	if (!hard_reset) {
4078 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4079 		return;
4080 	}
4081 
4082 	if (hdev->pldm) {
4083 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4084 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4085 	} else {
4086 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4087 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4088 	}
4089 
4090 	if (fw_reset) {
4091 		dev_dbg(hdev->dev,
4092 			"Firmware performs HARD reset, going to wait %dms\n",
4093 			reset_timeout_ms);
4094 
4095 		goto skip_reset;
4096 	}
4097 
4098 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4099 					!hdev->asic_prop.hard_reset_done_by_fw);
4100 
4101 	/* Set device to handle FLR by H/W as we will put the device CPU to
4102 	 * halt mode
4103 	 */
4104 	if (driver_performs_reset)
4105 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4106 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4107 
4108 	/* If linux is loaded in the device CPU we need to communicate with it
4109 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4110 	 * registers in case of old F/Ws
4111 	 */
4112 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4113 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4114 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4115 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4116 
4117 		WREG32(irq_handler_offset,
4118 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4119 
4120 		/* This is a hail-mary attempt to revive the card in the small chance that the
4121 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4122 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4123 		 * reset as if Linux wasn't loaded.
4124 		 *
4125 		 * We do it only if the reset cause was HB, because that would be the indication
4126 		 * of such an event.
4127 		 *
4128 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4129 		 * damage.
4130 		 */
4131 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4132 			if (hdev->asic_prop.hard_reset_done_by_fw)
4133 				hl_fw_ask_hard_reset_without_linux(hdev);
4134 			else
4135 				hl_fw_ask_halt_machine_without_linux(hdev);
4136 		}
4137 	} else {
4138 		if (hdev->asic_prop.hard_reset_done_by_fw)
4139 			hl_fw_ask_hard_reset_without_linux(hdev);
4140 		else
4141 			hl_fw_ask_halt_machine_without_linux(hdev);
4142 	}
4143 
4144 	if (driver_performs_reset) {
4145 
4146 		/* Configure the reset registers. Must be done as early as
4147 		 * possible in case we fail during H/W initialization
4148 		 */
4149 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4150 						(CFG_RST_H_DMA_MASK |
4151 						CFG_RST_H_MME_MASK |
4152 						CFG_RST_H_SM_MASK |
4153 						CFG_RST_H_TPC_7_MASK));
4154 
4155 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4156 
4157 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4158 						(CFG_RST_H_HBM_MASK |
4159 						CFG_RST_H_TPC_7_MASK |
4160 						CFG_RST_H_NIC_MASK |
4161 						CFG_RST_H_SM_MASK |
4162 						CFG_RST_H_DMA_MASK |
4163 						CFG_RST_H_MME_MASK |
4164 						CFG_RST_H_CPU_MASK |
4165 						CFG_RST_H_MMU_MASK));
4166 
4167 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4168 						(CFG_RST_L_IF_MASK |
4169 						CFG_RST_L_PSOC_MASK |
4170 						CFG_RST_L_TPC_MASK));
4171 
4172 		msleep(cpu_timeout_ms);
4173 
4174 		/* Tell ASIC not to re-initialize PCIe */
4175 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4176 
4177 		/* Restart BTL/BLR upon hard-reset */
4178 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4179 
4180 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4181 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4182 
4183 		dev_dbg(hdev->dev,
4184 			"Issued HARD reset command, going to wait %dms\n",
4185 			reset_timeout_ms);
4186 	} else {
4187 		dev_dbg(hdev->dev,
4188 			"Firmware performs HARD reset, going to wait %dms\n",
4189 			reset_timeout_ms);
4190 	}
4191 
4192 skip_reset:
4193 	/*
4194 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4195 	 * itself is in reset. Need to wait until the reset is deasserted
4196 	 */
4197 	msleep(reset_timeout_ms);
4198 
4199 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4200 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4201 		dev_err(hdev->dev,
4202 			"Timeout while waiting for device to reset 0x%x\n",
4203 			status);
4204 
4205 	if (gaudi) {
4206 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4207 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4208 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4209 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4210 						HW_CAP_HBM_SCRAMBLER);
4211 
4212 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4213 
4214 		hdev->device_cpu_is_halted = false;
4215 	}
4216 }
4217 
4218 static int gaudi_suspend(struct hl_device *hdev)
4219 {
4220 	int rc;
4221 
4222 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4223 	if (rc)
4224 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4225 
4226 	return rc;
4227 }
4228 
4229 static int gaudi_resume(struct hl_device *hdev)
4230 {
4231 	return gaudi_init_iatu(hdev);
4232 }
4233 
4234 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4235 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4236 {
4237 	int rc;
4238 
4239 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4240 			VM_DONTCOPY | VM_NORESERVE;
4241 
4242 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4243 				(dma_addr - HOST_PHYS_BASE), size);
4244 	if (rc)
4245 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4246 
4247 	return rc;
4248 }
4249 
4250 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4251 {
4252 	struct cpu_dyn_regs *dyn_regs =
4253 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4254 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4255 	struct gaudi_device *gaudi = hdev->asic_specific;
4256 	bool invalid_queue = false;
4257 	int dma_id;
4258 
4259 	switch (hw_queue_id) {
4260 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4261 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4262 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4263 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4264 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4265 		break;
4266 
4267 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4268 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4269 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4270 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4271 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4272 		break;
4273 
4274 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4275 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4276 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4277 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4278 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4279 		break;
4280 
4281 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4282 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4283 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4284 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4285 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4286 		break;
4287 
4288 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4289 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4290 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4291 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4292 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4293 		break;
4294 
4295 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4296 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4297 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4298 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4299 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4300 		break;
4301 
4302 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4303 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4304 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4305 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4306 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4307 		break;
4308 
4309 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4310 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4311 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4312 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4313 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4314 		break;
4315 
4316 	case GAUDI_QUEUE_ID_CPU_PQ:
4317 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4318 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4319 		else
4320 			invalid_queue = true;
4321 		break;
4322 
4323 	case GAUDI_QUEUE_ID_MME_0_0:
4324 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4325 		break;
4326 
4327 	case GAUDI_QUEUE_ID_MME_0_1:
4328 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4329 		break;
4330 
4331 	case GAUDI_QUEUE_ID_MME_0_2:
4332 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4333 		break;
4334 
4335 	case GAUDI_QUEUE_ID_MME_0_3:
4336 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4337 		break;
4338 
4339 	case GAUDI_QUEUE_ID_MME_1_0:
4340 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4341 		break;
4342 
4343 	case GAUDI_QUEUE_ID_MME_1_1:
4344 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4345 		break;
4346 
4347 	case GAUDI_QUEUE_ID_MME_1_2:
4348 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4349 		break;
4350 
4351 	case GAUDI_QUEUE_ID_MME_1_3:
4352 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4353 		break;
4354 
4355 	case GAUDI_QUEUE_ID_TPC_0_0:
4356 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4357 		break;
4358 
4359 	case GAUDI_QUEUE_ID_TPC_0_1:
4360 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4361 		break;
4362 
4363 	case GAUDI_QUEUE_ID_TPC_0_2:
4364 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4365 		break;
4366 
4367 	case GAUDI_QUEUE_ID_TPC_0_3:
4368 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4369 		break;
4370 
4371 	case GAUDI_QUEUE_ID_TPC_1_0:
4372 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4373 		break;
4374 
4375 	case GAUDI_QUEUE_ID_TPC_1_1:
4376 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4377 		break;
4378 
4379 	case GAUDI_QUEUE_ID_TPC_1_2:
4380 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4381 		break;
4382 
4383 	case GAUDI_QUEUE_ID_TPC_1_3:
4384 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4385 		break;
4386 
4387 	case GAUDI_QUEUE_ID_TPC_2_0:
4388 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4389 		break;
4390 
4391 	case GAUDI_QUEUE_ID_TPC_2_1:
4392 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4393 		break;
4394 
4395 	case GAUDI_QUEUE_ID_TPC_2_2:
4396 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4397 		break;
4398 
4399 	case GAUDI_QUEUE_ID_TPC_2_3:
4400 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4401 		break;
4402 
4403 	case GAUDI_QUEUE_ID_TPC_3_0:
4404 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4405 		break;
4406 
4407 	case GAUDI_QUEUE_ID_TPC_3_1:
4408 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4409 		break;
4410 
4411 	case GAUDI_QUEUE_ID_TPC_3_2:
4412 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4413 		break;
4414 
4415 	case GAUDI_QUEUE_ID_TPC_3_3:
4416 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4417 		break;
4418 
4419 	case GAUDI_QUEUE_ID_TPC_4_0:
4420 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4421 		break;
4422 
4423 	case GAUDI_QUEUE_ID_TPC_4_1:
4424 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4425 		break;
4426 
4427 	case GAUDI_QUEUE_ID_TPC_4_2:
4428 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4429 		break;
4430 
4431 	case GAUDI_QUEUE_ID_TPC_4_3:
4432 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4433 		break;
4434 
4435 	case GAUDI_QUEUE_ID_TPC_5_0:
4436 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4437 		break;
4438 
4439 	case GAUDI_QUEUE_ID_TPC_5_1:
4440 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4441 		break;
4442 
4443 	case GAUDI_QUEUE_ID_TPC_5_2:
4444 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4445 		break;
4446 
4447 	case GAUDI_QUEUE_ID_TPC_5_3:
4448 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4449 		break;
4450 
4451 	case GAUDI_QUEUE_ID_TPC_6_0:
4452 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4453 		break;
4454 
4455 	case GAUDI_QUEUE_ID_TPC_6_1:
4456 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4457 		break;
4458 
4459 	case GAUDI_QUEUE_ID_TPC_6_2:
4460 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4461 		break;
4462 
4463 	case GAUDI_QUEUE_ID_TPC_6_3:
4464 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4465 		break;
4466 
4467 	case GAUDI_QUEUE_ID_TPC_7_0:
4468 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4469 		break;
4470 
4471 	case GAUDI_QUEUE_ID_TPC_7_1:
4472 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4473 		break;
4474 
4475 	case GAUDI_QUEUE_ID_TPC_7_2:
4476 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4477 		break;
4478 
4479 	case GAUDI_QUEUE_ID_TPC_7_3:
4480 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4481 		break;
4482 
4483 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4484 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4485 			invalid_queue = true;
4486 
4487 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4488 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4489 		break;
4490 
4491 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4492 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4493 			invalid_queue = true;
4494 
4495 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4496 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4497 		break;
4498 
4499 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4500 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4501 			invalid_queue = true;
4502 
4503 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4504 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4505 		break;
4506 
4507 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4508 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4509 			invalid_queue = true;
4510 
4511 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4512 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4513 		break;
4514 
4515 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4516 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4517 			invalid_queue = true;
4518 
4519 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4520 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4521 		break;
4522 
4523 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4524 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4525 			invalid_queue = true;
4526 
4527 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4528 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4529 		break;
4530 
4531 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4532 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4533 			invalid_queue = true;
4534 
4535 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4536 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4537 		break;
4538 
4539 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4540 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4541 			invalid_queue = true;
4542 
4543 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4544 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4545 		break;
4546 
4547 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4548 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4549 			invalid_queue = true;
4550 
4551 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4552 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4553 		break;
4554 
4555 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4556 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4557 			invalid_queue = true;
4558 
4559 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4560 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4561 		break;
4562 
4563 	default:
4564 		invalid_queue = true;
4565 	}
4566 
4567 	if (invalid_queue) {
4568 		/* Should never get here */
4569 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4570 			hw_queue_id);
4571 		return;
4572 	}
4573 
4574 	db_value = pi;
4575 
4576 	/* ring the doorbell */
4577 	WREG32(db_reg_offset, db_value);
4578 
4579 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4580 		/* make sure device CPU will read latest data from host */
4581 		mb();
4582 
4583 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4584 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4585 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4586 
4587 		WREG32(irq_handler_offset,
4588 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4589 	}
4590 }
4591 
4592 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4593 				struct hl_bd *bd)
4594 {
4595 	__le64 *pbd = (__le64 *) bd;
4596 
4597 	/* The QMANs are on the host memory so a simple copy suffice */
4598 	pqe[0] = pbd[0];
4599 	pqe[1] = pbd[1];
4600 }
4601 
4602 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4603 					dma_addr_t *dma_handle, gfp_t flags)
4604 {
4605 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4606 						dma_handle, flags);
4607 
4608 	/* Shift to the device's base physical address of host memory */
4609 	if (kernel_addr)
4610 		*dma_handle += HOST_PHYS_BASE;
4611 
4612 	return kernel_addr;
4613 }
4614 
4615 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4616 		void *cpu_addr, dma_addr_t dma_handle)
4617 {
4618 	/* Cancel the device's base physical address of host memory */
4619 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4620 
4621 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4622 }
4623 
4624 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4625 {
4626 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4627 	u64 cur_addr = prop->dram_user_base_address;
4628 	u32 chunk_size, busy;
4629 	int rc, dma_id;
4630 
4631 	while (cur_addr < prop->dram_end_address) {
4632 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4633 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4634 
4635 			chunk_size =
4636 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4637 
4638 			dev_dbg(hdev->dev,
4639 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4640 				cur_addr, cur_addr + chunk_size);
4641 
4642 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4643 					lower_32_bits(val));
4644 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4645 					upper_32_bits(val));
4646 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4647 						lower_32_bits(cur_addr));
4648 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4649 						upper_32_bits(cur_addr));
4650 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4651 					chunk_size);
4652 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4653 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4654 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4655 
4656 			cur_addr += chunk_size;
4657 
4658 			if (cur_addr == prop->dram_end_address)
4659 				break;
4660 		}
4661 
4662 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4663 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4664 
4665 			rc = hl_poll_timeout(
4666 				hdev,
4667 				mmDMA0_CORE_STS0 + dma_offset,
4668 				busy,
4669 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4670 				1000,
4671 				HBM_SCRUBBING_TIMEOUT_US);
4672 
4673 			if (rc) {
4674 				dev_err(hdev->dev,
4675 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4676 					dma_id);
4677 				return -EIO;
4678 			}
4679 		}
4680 	}
4681 
4682 	return 0;
4683 }
4684 
4685 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4686 {
4687 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4688 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4689 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4690 	u64 addr, size, val = hdev->memory_scrub_val;
4691 	ktime_t timeout;
4692 	int rc = 0;
4693 
4694 	if (!hdev->memory_scrub)
4695 		return 0;
4696 
4697 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4698 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4699 		if (ktime_compare(ktime_get(), timeout) > 0) {
4700 			dev_err(hdev->dev, "waiting for idle timeout\n");
4701 			return -ETIMEDOUT;
4702 		}
4703 		usleep_range((1000 >> 2) + 1, 1000);
4704 	}
4705 
4706 	/* Scrub SRAM */
4707 	addr = prop->sram_user_base_address;
4708 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4709 
4710 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4711 			addr, addr + size, val);
4712 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4713 	if (rc) {
4714 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4715 		return rc;
4716 	}
4717 
4718 	/* Scrub HBM using all DMA channels in parallel */
4719 	rc = gaudi_scrub_device_dram(hdev, val);
4720 	if (rc) {
4721 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4722 		return rc;
4723 	}
4724 
4725 	return 0;
4726 }
4727 
4728 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4729 				u32 queue_id, dma_addr_t *dma_handle,
4730 				u16 *queue_len)
4731 {
4732 	struct gaudi_device *gaudi = hdev->asic_specific;
4733 	struct gaudi_internal_qman_info *q;
4734 
4735 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4736 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4737 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4738 		return NULL;
4739 	}
4740 
4741 	q = &gaudi->internal_qmans[queue_id];
4742 	*dma_handle = q->pq_dma_addr;
4743 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4744 
4745 	return q->pq_kernel_addr;
4746 }
4747 
4748 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4749 				u16 len, u32 timeout, u64 *result)
4750 {
4751 	struct gaudi_device *gaudi = hdev->asic_specific;
4752 
4753 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4754 		if (result)
4755 			*result = 0;
4756 		return 0;
4757 	}
4758 
4759 	if (!timeout)
4760 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4761 
4762 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4763 						timeout, result);
4764 }
4765 
4766 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4767 {
4768 	struct packet_msg_prot *fence_pkt;
4769 	dma_addr_t pkt_dma_addr;
4770 	u32 fence_val, tmp, timeout_usec;
4771 	dma_addr_t fence_dma_addr;
4772 	u32 *fence_ptr;
4773 	int rc;
4774 
4775 	if (hdev->pldm)
4776 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4777 	else
4778 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4779 
4780 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4781 
4782 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4783 	if (!fence_ptr) {
4784 		dev_err(hdev->dev,
4785 			"Failed to allocate memory for H/W queue %d testing\n",
4786 			hw_queue_id);
4787 		return -ENOMEM;
4788 	}
4789 
4790 	*fence_ptr = 0;
4791 
4792 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4793 						&pkt_dma_addr);
4794 	if (!fence_pkt) {
4795 		dev_err(hdev->dev,
4796 			"Failed to allocate packet for H/W queue %d testing\n",
4797 			hw_queue_id);
4798 		rc = -ENOMEM;
4799 		goto free_fence_ptr;
4800 	}
4801 
4802 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4803 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4804 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4805 
4806 	fence_pkt->ctl = cpu_to_le32(tmp);
4807 	fence_pkt->value = cpu_to_le32(fence_val);
4808 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4809 
4810 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4811 					sizeof(struct packet_msg_prot),
4812 					pkt_dma_addr);
4813 	if (rc) {
4814 		dev_err(hdev->dev,
4815 			"Failed to send fence packet to H/W queue %d\n",
4816 			hw_queue_id);
4817 		goto free_pkt;
4818 	}
4819 
4820 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4821 					1000, timeout_usec, true);
4822 
4823 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4824 
4825 	if (rc == -ETIMEDOUT) {
4826 		dev_err(hdev->dev,
4827 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4828 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4829 		rc = -EIO;
4830 	}
4831 
4832 free_pkt:
4833 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4834 free_fence_ptr:
4835 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4836 	return rc;
4837 }
4838 
4839 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4840 {
4841 	struct gaudi_device *gaudi = hdev->asic_specific;
4842 
4843 	/*
4844 	 * check capability here as send_cpu_message() won't update the result
4845 	 * value if no capability
4846 	 */
4847 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4848 		return 0;
4849 
4850 	return hl_fw_test_cpu_queue(hdev);
4851 }
4852 
4853 static int gaudi_test_queues(struct hl_device *hdev)
4854 {
4855 	int i, rc, ret_val = 0;
4856 
4857 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4858 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4859 			rc = gaudi_test_queue(hdev, i);
4860 			if (rc)
4861 				ret_val = -EINVAL;
4862 		}
4863 	}
4864 
4865 	rc = gaudi_test_cpu_queue(hdev);
4866 	if (rc)
4867 		ret_val = -EINVAL;
4868 
4869 	return ret_val;
4870 }
4871 
4872 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4873 		gfp_t mem_flags, dma_addr_t *dma_handle)
4874 {
4875 	void *kernel_addr;
4876 
4877 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4878 		return NULL;
4879 
4880 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4881 
4882 	/* Shift to the device's base physical address of host memory */
4883 	if (kernel_addr)
4884 		*dma_handle += HOST_PHYS_BASE;
4885 
4886 	return kernel_addr;
4887 }
4888 
4889 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4890 			dma_addr_t dma_addr)
4891 {
4892 	/* Cancel the device's base physical address of host memory */
4893 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4894 
4895 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4896 }
4897 
4898 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4899 					size_t size, dma_addr_t *dma_handle)
4900 {
4901 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4902 }
4903 
4904 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4905 						size_t size, void *vaddr)
4906 {
4907 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4908 }
4909 
4910 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4911 {
4912 	struct scatterlist *sg, *sg_next_iter;
4913 	u32 count, dma_desc_cnt;
4914 	u64 len, len_next;
4915 	dma_addr_t addr, addr_next;
4916 
4917 	dma_desc_cnt = 0;
4918 
4919 	for_each_sgtable_dma_sg(sgt, sg, count) {
4920 		len = sg_dma_len(sg);
4921 		addr = sg_dma_address(sg);
4922 
4923 		if (len == 0)
4924 			break;
4925 
4926 		while ((count + 1) < sgt->nents) {
4927 			sg_next_iter = sg_next(sg);
4928 			len_next = sg_dma_len(sg_next_iter);
4929 			addr_next = sg_dma_address(sg_next_iter);
4930 
4931 			if (len_next == 0)
4932 				break;
4933 
4934 			if ((addr + len == addr_next) &&
4935 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4936 				len += len_next;
4937 				count++;
4938 				sg = sg_next_iter;
4939 			} else {
4940 				break;
4941 			}
4942 		}
4943 
4944 		dma_desc_cnt++;
4945 	}
4946 
4947 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4948 }
4949 
4950 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4951 				struct hl_cs_parser *parser,
4952 				struct packet_lin_dma *user_dma_pkt,
4953 				u64 addr, enum dma_data_direction dir)
4954 {
4955 	struct hl_userptr *userptr;
4956 	int rc;
4957 
4958 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4959 			parser->job_userptr_list, &userptr))
4960 		goto already_pinned;
4961 
4962 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4963 	if (!userptr)
4964 		return -ENOMEM;
4965 
4966 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4967 				userptr);
4968 	if (rc)
4969 		goto free_userptr;
4970 
4971 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4972 
4973 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4974 	if (rc) {
4975 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4976 		goto unpin_memory;
4977 	}
4978 
4979 	userptr->dma_mapped = true;
4980 	userptr->dir = dir;
4981 
4982 already_pinned:
4983 	parser->patched_cb_size +=
4984 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4985 
4986 	return 0;
4987 
4988 unpin_memory:
4989 	list_del(&userptr->job_node);
4990 	hl_unpin_host_memory(hdev, userptr);
4991 free_userptr:
4992 	kfree(userptr);
4993 	return rc;
4994 }
4995 
4996 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4997 				struct hl_cs_parser *parser,
4998 				struct packet_lin_dma *user_dma_pkt,
4999 				bool src_in_host)
5000 {
5001 	enum dma_data_direction dir;
5002 	bool skip_host_mem_pin = false, user_memset;
5003 	u64 addr;
5004 	int rc = 0;
5005 
5006 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5007 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5008 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5009 
5010 	if (src_in_host) {
5011 		if (user_memset)
5012 			skip_host_mem_pin = true;
5013 
5014 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5015 		dir = DMA_TO_DEVICE;
5016 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5017 	} else {
5018 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5019 		dir = DMA_FROM_DEVICE;
5020 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5021 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5022 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5023 	}
5024 
5025 	if (skip_host_mem_pin)
5026 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5027 	else
5028 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5029 						addr, dir);
5030 
5031 	return rc;
5032 }
5033 
5034 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5035 				struct hl_cs_parser *parser,
5036 				struct packet_lin_dma *user_dma_pkt)
5037 {
5038 	bool src_in_host = false;
5039 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5040 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5041 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5042 
5043 	dev_dbg(hdev->dev, "DMA packet details:\n");
5044 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5045 				le64_to_cpu(user_dma_pkt->src_addr));
5046 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5047 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5048 
5049 	/*
5050 	 * Special handling for DMA with size 0. Bypass all validations
5051 	 * because no transactions will be done except for WR_COMP, which
5052 	 * is not a security issue
5053 	 */
5054 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5055 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5056 		return 0;
5057 	}
5058 
5059 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5060 		src_in_host = true;
5061 
5062 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5063 						src_in_host);
5064 }
5065 
5066 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5067 					struct hl_cs_parser *parser,
5068 					struct packet_load_and_exe *user_pkt)
5069 {
5070 	u32 cfg;
5071 
5072 	cfg = le32_to_cpu(user_pkt->cfg);
5073 
5074 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5075 		dev_err(hdev->dev,
5076 			"User not allowed to use Load and Execute\n");
5077 		return -EPERM;
5078 	}
5079 
5080 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5081 
5082 	return 0;
5083 }
5084 
5085 static int gaudi_validate_cb(struct hl_device *hdev,
5086 			struct hl_cs_parser *parser, bool is_mmu)
5087 {
5088 	u32 cb_parsed_length = 0;
5089 	int rc = 0;
5090 
5091 	parser->patched_cb_size = 0;
5092 
5093 	/* cb_user_size is more than 0 so loop will always be executed */
5094 	while (cb_parsed_length < parser->user_cb_size) {
5095 		enum packet_id pkt_id;
5096 		u16 pkt_size;
5097 		struct gaudi_packet *user_pkt;
5098 
5099 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5100 
5101 		pkt_id = (enum packet_id) (
5102 				(le64_to_cpu(user_pkt->header) &
5103 				PACKET_HEADER_PACKET_ID_MASK) >>
5104 					PACKET_HEADER_PACKET_ID_SHIFT);
5105 
5106 		if (!validate_packet_id(pkt_id)) {
5107 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5108 			rc = -EINVAL;
5109 			break;
5110 		}
5111 
5112 		pkt_size = gaudi_packet_sizes[pkt_id];
5113 		cb_parsed_length += pkt_size;
5114 		if (cb_parsed_length > parser->user_cb_size) {
5115 			dev_err(hdev->dev,
5116 				"packet 0x%x is out of CB boundary\n", pkt_id);
5117 			rc = -EINVAL;
5118 			break;
5119 		}
5120 
5121 		switch (pkt_id) {
5122 		case PACKET_MSG_PROT:
5123 			dev_err(hdev->dev,
5124 				"User not allowed to use MSG_PROT\n");
5125 			rc = -EPERM;
5126 			break;
5127 
5128 		case PACKET_CP_DMA:
5129 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5130 			rc = -EPERM;
5131 			break;
5132 
5133 		case PACKET_STOP:
5134 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5135 			rc = -EPERM;
5136 			break;
5137 
5138 		case PACKET_WREG_BULK:
5139 			dev_err(hdev->dev,
5140 				"User not allowed to use WREG_BULK\n");
5141 			rc = -EPERM;
5142 			break;
5143 
5144 		case PACKET_LOAD_AND_EXE:
5145 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5146 				(struct packet_load_and_exe *) user_pkt);
5147 			break;
5148 
5149 		case PACKET_LIN_DMA:
5150 			parser->contains_dma_pkt = true;
5151 			if (is_mmu)
5152 				parser->patched_cb_size += pkt_size;
5153 			else
5154 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5155 					(struct packet_lin_dma *) user_pkt);
5156 			break;
5157 
5158 		case PACKET_WREG_32:
5159 		case PACKET_MSG_LONG:
5160 		case PACKET_MSG_SHORT:
5161 		case PACKET_REPEAT:
5162 		case PACKET_FENCE:
5163 		case PACKET_NOP:
5164 		case PACKET_ARB_POINT:
5165 			parser->patched_cb_size += pkt_size;
5166 			break;
5167 
5168 		default:
5169 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5170 				pkt_id);
5171 			rc = -EINVAL;
5172 			break;
5173 		}
5174 
5175 		if (rc)
5176 			break;
5177 	}
5178 
5179 	/*
5180 	 * The new CB should have space at the end for two MSG_PROT packets:
5181 	 * 1. Optional NOP padding for cacheline alignment
5182 	 * 2. A packet that will act as a completion packet
5183 	 * 3. A packet that will generate MSI interrupt
5184 	 */
5185 	if (parser->completion)
5186 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5187 			parser->patched_cb_size);
5188 
5189 	return rc;
5190 }
5191 
5192 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5193 				struct hl_cs_parser *parser,
5194 				struct packet_lin_dma *user_dma_pkt,
5195 				struct packet_lin_dma *new_dma_pkt,
5196 				u32 *new_dma_pkt_size)
5197 {
5198 	struct hl_userptr *userptr;
5199 	struct scatterlist *sg, *sg_next_iter;
5200 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5201 	u64 len, len_next;
5202 	dma_addr_t dma_addr, dma_addr_next;
5203 	u64 device_memory_addr, addr;
5204 	enum dma_data_direction dir;
5205 	struct sg_table *sgt;
5206 	bool src_in_host = false;
5207 	bool skip_host_mem_pin = false;
5208 	bool user_memset;
5209 
5210 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5211 
5212 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5213 		src_in_host = true;
5214 
5215 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5216 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5217 
5218 	if (src_in_host) {
5219 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5220 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5221 		dir = DMA_TO_DEVICE;
5222 		if (user_memset)
5223 			skip_host_mem_pin = true;
5224 	} else {
5225 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5226 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5227 		dir = DMA_FROM_DEVICE;
5228 	}
5229 
5230 	if ((!skip_host_mem_pin) &&
5231 		(!hl_userptr_is_pinned(hdev, addr,
5232 					le32_to_cpu(user_dma_pkt->tsize),
5233 					parser->job_userptr_list, &userptr))) {
5234 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5235 				addr, user_dma_pkt->tsize);
5236 		return -EFAULT;
5237 	}
5238 
5239 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5240 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5241 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5242 		return 0;
5243 	}
5244 
5245 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5246 
5247 	sgt = userptr->sgt;
5248 	dma_desc_cnt = 0;
5249 
5250 	for_each_sgtable_dma_sg(sgt, sg, count) {
5251 		len = sg_dma_len(sg);
5252 		dma_addr = sg_dma_address(sg);
5253 
5254 		if (len == 0)
5255 			break;
5256 
5257 		while ((count + 1) < sgt->nents) {
5258 			sg_next_iter = sg_next(sg);
5259 			len_next = sg_dma_len(sg_next_iter);
5260 			dma_addr_next = sg_dma_address(sg_next_iter);
5261 
5262 			if (len_next == 0)
5263 				break;
5264 
5265 			if ((dma_addr + len == dma_addr_next) &&
5266 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5267 				len += len_next;
5268 				count++;
5269 				sg = sg_next_iter;
5270 			} else {
5271 				break;
5272 			}
5273 		}
5274 
5275 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5276 		if (likely(dma_desc_cnt))
5277 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5278 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5279 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5280 		new_dma_pkt->tsize = cpu_to_le32(len);
5281 
5282 		if (dir == DMA_TO_DEVICE) {
5283 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5284 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5285 		} else {
5286 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5287 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5288 		}
5289 
5290 		if (!user_memset)
5291 			device_memory_addr += len;
5292 		dma_desc_cnt++;
5293 		new_dma_pkt++;
5294 	}
5295 
5296 	if (!dma_desc_cnt) {
5297 		dev_err(hdev->dev,
5298 			"Error of 0 SG entries when patching DMA packet\n");
5299 		return -EFAULT;
5300 	}
5301 
5302 	/* Fix the last dma packet - wrcomp must be as user set it */
5303 	new_dma_pkt--;
5304 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5305 
5306 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5307 
5308 	return 0;
5309 }
5310 
5311 static int gaudi_patch_cb(struct hl_device *hdev,
5312 				struct hl_cs_parser *parser)
5313 {
5314 	u32 cb_parsed_length = 0;
5315 	u32 cb_patched_cur_length = 0;
5316 	int rc = 0;
5317 
5318 	/* cb_user_size is more than 0 so loop will always be executed */
5319 	while (cb_parsed_length < parser->user_cb_size) {
5320 		enum packet_id pkt_id;
5321 		u16 pkt_size;
5322 		u32 new_pkt_size = 0;
5323 		struct gaudi_packet *user_pkt, *kernel_pkt;
5324 
5325 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5326 		kernel_pkt = parser->patched_cb->kernel_address +
5327 					cb_patched_cur_length;
5328 
5329 		pkt_id = (enum packet_id) (
5330 				(le64_to_cpu(user_pkt->header) &
5331 				PACKET_HEADER_PACKET_ID_MASK) >>
5332 					PACKET_HEADER_PACKET_ID_SHIFT);
5333 
5334 		if (!validate_packet_id(pkt_id)) {
5335 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5336 			rc = -EINVAL;
5337 			break;
5338 		}
5339 
5340 		pkt_size = gaudi_packet_sizes[pkt_id];
5341 		cb_parsed_length += pkt_size;
5342 		if (cb_parsed_length > parser->user_cb_size) {
5343 			dev_err(hdev->dev,
5344 				"packet 0x%x is out of CB boundary\n", pkt_id);
5345 			rc = -EINVAL;
5346 			break;
5347 		}
5348 
5349 		switch (pkt_id) {
5350 		case PACKET_LIN_DMA:
5351 			rc = gaudi_patch_dma_packet(hdev, parser,
5352 					(struct packet_lin_dma *) user_pkt,
5353 					(struct packet_lin_dma *) kernel_pkt,
5354 					&new_pkt_size);
5355 			cb_patched_cur_length += new_pkt_size;
5356 			break;
5357 
5358 		case PACKET_MSG_PROT:
5359 			dev_err(hdev->dev,
5360 				"User not allowed to use MSG_PROT\n");
5361 			rc = -EPERM;
5362 			break;
5363 
5364 		case PACKET_CP_DMA:
5365 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5366 			rc = -EPERM;
5367 			break;
5368 
5369 		case PACKET_STOP:
5370 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5371 			rc = -EPERM;
5372 			break;
5373 
5374 		case PACKET_WREG_32:
5375 		case PACKET_WREG_BULK:
5376 		case PACKET_MSG_LONG:
5377 		case PACKET_MSG_SHORT:
5378 		case PACKET_REPEAT:
5379 		case PACKET_FENCE:
5380 		case PACKET_NOP:
5381 		case PACKET_ARB_POINT:
5382 		case PACKET_LOAD_AND_EXE:
5383 			memcpy(kernel_pkt, user_pkt, pkt_size);
5384 			cb_patched_cur_length += pkt_size;
5385 			break;
5386 
5387 		default:
5388 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5389 				pkt_id);
5390 			rc = -EINVAL;
5391 			break;
5392 		}
5393 
5394 		if (rc)
5395 			break;
5396 	}
5397 
5398 	return rc;
5399 }
5400 
5401 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5402 		struct hl_cs_parser *parser)
5403 {
5404 	u64 handle;
5405 	u32 patched_cb_size;
5406 	struct hl_cb *user_cb;
5407 	int rc;
5408 
5409 	/*
5410 	 * The new CB should have space at the end for two MSG_PROT packets:
5411 	 * 1. Optional NOP padding for cacheline alignment
5412 	 * 2. A packet that will act as a completion packet
5413 	 * 3. A packet that will generate MSI interrupt
5414 	 */
5415 	if (parser->completion)
5416 		parser->patched_cb_size = parser->user_cb_size +
5417 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5418 	else
5419 		parser->patched_cb_size = parser->user_cb_size;
5420 
5421 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5422 				parser->patched_cb_size, false, false,
5423 				&handle);
5424 
5425 	if (rc) {
5426 		dev_err(hdev->dev,
5427 			"Failed to allocate patched CB for DMA CS %d\n",
5428 			rc);
5429 		return rc;
5430 	}
5431 
5432 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5433 	/* hl_cb_get should never fail */
5434 	if (!parser->patched_cb) {
5435 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5436 		rc = -EFAULT;
5437 		goto out;
5438 	}
5439 
5440 	/*
5441 	 * We are protected from overflow because the check
5442 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5443 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5444 	 *
5445 	 * There is no option to reach here without going through that check because:
5446 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5447 	 *    an external queue.
5448 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5449 	 */
5450 	memcpy(parser->patched_cb->kernel_address,
5451 		parser->user_cb->kernel_address,
5452 		parser->user_cb_size);
5453 
5454 	patched_cb_size = parser->patched_cb_size;
5455 
5456 	/* Validate patched CB instead of user CB */
5457 	user_cb = parser->user_cb;
5458 	parser->user_cb = parser->patched_cb;
5459 	rc = gaudi_validate_cb(hdev, parser, true);
5460 	parser->user_cb = user_cb;
5461 
5462 	if (rc) {
5463 		hl_cb_put(parser->patched_cb);
5464 		goto out;
5465 	}
5466 
5467 	if (patched_cb_size != parser->patched_cb_size) {
5468 		dev_err(hdev->dev, "user CB size mismatch\n");
5469 		hl_cb_put(parser->patched_cb);
5470 		rc = -EINVAL;
5471 		goto out;
5472 	}
5473 
5474 out:
5475 	/*
5476 	 * Always call cb destroy here because we still have 1 reference
5477 	 * to it by calling cb_get earlier. After the job will be completed,
5478 	 * cb_put will release it, but here we want to remove it from the
5479 	 * idr
5480 	 */
5481 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5482 
5483 	return rc;
5484 }
5485 
5486 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5487 		struct hl_cs_parser *parser)
5488 {
5489 	u64 handle;
5490 	int rc;
5491 
5492 	rc = gaudi_validate_cb(hdev, parser, false);
5493 
5494 	if (rc)
5495 		goto free_userptr;
5496 
5497 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5498 				parser->patched_cb_size, false, false,
5499 				&handle);
5500 	if (rc) {
5501 		dev_err(hdev->dev,
5502 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5503 		goto free_userptr;
5504 	}
5505 
5506 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5507 	/* hl_cb_get should never fail here */
5508 	if (!parser->patched_cb) {
5509 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5510 		rc = -EFAULT;
5511 		goto out;
5512 	}
5513 
5514 	rc = gaudi_patch_cb(hdev, parser);
5515 
5516 	if (rc)
5517 		hl_cb_put(parser->patched_cb);
5518 
5519 out:
5520 	/*
5521 	 * Always call cb destroy here because we still have 1 reference
5522 	 * to it by calling cb_get earlier. After the job will be completed,
5523 	 * cb_put will release it, but here we want to remove it from the
5524 	 * idr
5525 	 */
5526 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5527 
5528 free_userptr:
5529 	if (rc)
5530 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5531 	return rc;
5532 }
5533 
5534 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5535 					struct hl_cs_parser *parser)
5536 {
5537 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5538 	struct gaudi_device *gaudi = hdev->asic_specific;
5539 	u32 nic_queue_offset, nic_mask_q_id;
5540 
5541 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5542 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5543 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5544 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5545 
5546 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5547 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5548 			return -EINVAL;
5549 		}
5550 	}
5551 
5552 	/* For internal queue jobs just check if CB address is valid */
5553 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5554 					parser->user_cb_size,
5555 					asic_prop->sram_user_base_address,
5556 					asic_prop->sram_end_address))
5557 		return 0;
5558 
5559 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5560 					parser->user_cb_size,
5561 					asic_prop->dram_user_base_address,
5562 					asic_prop->dram_end_address))
5563 		return 0;
5564 
5565 	/* PMMU and HPMMU addresses are equal, check only one of them */
5566 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5567 					parser->user_cb_size,
5568 					asic_prop->pmmu.start_addr,
5569 					asic_prop->pmmu.end_addr))
5570 		return 0;
5571 
5572 	dev_err(hdev->dev,
5573 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5574 		parser->user_cb, parser->user_cb_size);
5575 
5576 	return -EFAULT;
5577 }
5578 
5579 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5580 {
5581 	struct gaudi_device *gaudi = hdev->asic_specific;
5582 
5583 	if (parser->queue_type == QUEUE_TYPE_INT)
5584 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5585 
5586 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5587 		return gaudi_parse_cb_mmu(hdev, parser);
5588 	else
5589 		return gaudi_parse_cb_no_mmu(hdev, parser);
5590 }
5591 
5592 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5593 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5594 				u32 msi_vec, bool eb)
5595 {
5596 	struct gaudi_device *gaudi = hdev->asic_specific;
5597 	struct packet_msg_prot *cq_pkt;
5598 	struct packet_nop *cq_padding;
5599 	u64 msi_addr;
5600 	u32 tmp;
5601 
5602 	cq_padding = kernel_address + original_len;
5603 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5604 
5605 	while ((void *)cq_padding < (void *)cq_pkt) {
5606 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5607 		cq_padding++;
5608 	}
5609 
5610 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5611 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5612 
5613 	if (eb)
5614 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5615 
5616 	cq_pkt->ctl = cpu_to_le32(tmp);
5617 	cq_pkt->value = cpu_to_le32(cq_val);
5618 	cq_pkt->addr = cpu_to_le64(cq_addr);
5619 
5620 	cq_pkt++;
5621 
5622 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5623 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5624 	cq_pkt->ctl = cpu_to_le32(tmp);
5625 	cq_pkt->value = cpu_to_le32(1);
5626 
5627 	if (gaudi->multi_msi_mode)
5628 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5629 	else
5630 		msi_addr = mmPCIE_CORE_MSI_REQ;
5631 
5632 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5633 }
5634 
5635 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5636 {
5637 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5638 }
5639 
5640 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5641 					u32 size, u64 val)
5642 {
5643 	struct packet_lin_dma *lin_dma_pkt;
5644 	struct hl_cs_job *job;
5645 	u32 cb_size, ctl, err_cause;
5646 	struct hl_cb *cb;
5647 	int rc;
5648 
5649 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5650 	if (!cb)
5651 		return -EFAULT;
5652 
5653 	lin_dma_pkt = cb->kernel_address;
5654 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5655 	cb_size = sizeof(*lin_dma_pkt);
5656 
5657 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5658 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5659 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5660 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5661 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5662 
5663 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5664 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5665 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5666 	lin_dma_pkt->tsize = cpu_to_le32(size);
5667 
5668 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5669 	if (!job) {
5670 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5671 		rc = -ENOMEM;
5672 		goto release_cb;
5673 	}
5674 
5675 	/* Verify DMA is OK */
5676 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5677 	if (err_cause && !hdev->init_done) {
5678 		dev_dbg(hdev->dev,
5679 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5680 			err_cause);
5681 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5682 	}
5683 
5684 	job->id = 0;
5685 	job->user_cb = cb;
5686 	atomic_inc(&job->user_cb->cs_cnt);
5687 	job->user_cb_size = cb_size;
5688 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5689 	job->patched_cb = job->user_cb;
5690 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5691 
5692 	hl_debugfs_add_job(hdev, job);
5693 
5694 	rc = gaudi_send_job_on_qman0(hdev, job);
5695 	hl_debugfs_remove_job(hdev, job);
5696 	kfree(job);
5697 	atomic_dec(&cb->cs_cnt);
5698 
5699 	/* Verify DMA is OK */
5700 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5701 	if (err_cause) {
5702 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5703 		rc = -EIO;
5704 		if (!hdev->init_done) {
5705 			dev_dbg(hdev->dev,
5706 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5707 				err_cause);
5708 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5709 		}
5710 	}
5711 
5712 release_cb:
5713 	hl_cb_put(cb);
5714 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5715 
5716 	return rc;
5717 }
5718 
5719 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5720 					u32 num_regs, u32 val)
5721 {
5722 	struct packet_msg_long *pkt;
5723 	struct hl_cs_job *job;
5724 	u32 cb_size, ctl;
5725 	struct hl_cb *cb;
5726 	int i, rc;
5727 
5728 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5729 
5730 	if (cb_size > SZ_2M) {
5731 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5732 		return -ENOMEM;
5733 	}
5734 
5735 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5736 	if (!cb)
5737 		return -EFAULT;
5738 
5739 	pkt = cb->kernel_address;
5740 
5741 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5742 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5743 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5744 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5745 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5746 
5747 	for (i = 0; i < num_regs ; i++, pkt++) {
5748 		pkt->ctl = cpu_to_le32(ctl);
5749 		pkt->value = cpu_to_le32(val);
5750 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5751 	}
5752 
5753 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5754 	if (!job) {
5755 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5756 		rc = -ENOMEM;
5757 		goto release_cb;
5758 	}
5759 
5760 	job->id = 0;
5761 	job->user_cb = cb;
5762 	atomic_inc(&job->user_cb->cs_cnt);
5763 	job->user_cb_size = cb_size;
5764 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5765 	job->patched_cb = job->user_cb;
5766 	job->job_cb_size = cb_size;
5767 
5768 	hl_debugfs_add_job(hdev, job);
5769 
5770 	rc = gaudi_send_job_on_qman0(hdev, job);
5771 	hl_debugfs_remove_job(hdev, job);
5772 	kfree(job);
5773 	atomic_dec(&cb->cs_cnt);
5774 
5775 release_cb:
5776 	hl_cb_put(cb);
5777 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5778 
5779 	return rc;
5780 }
5781 
5782 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5783 {
5784 	u64 base_addr;
5785 	u32 num_regs;
5786 	int rc;
5787 
5788 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5789 	num_regs = NUM_OF_SOB_IN_BLOCK;
5790 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5791 	if (rc) {
5792 		dev_err(hdev->dev, "failed resetting SM registers");
5793 		return -ENOMEM;
5794 	}
5795 
5796 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5797 	num_regs = NUM_OF_SOB_IN_BLOCK;
5798 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5799 	if (rc) {
5800 		dev_err(hdev->dev, "failed resetting SM registers");
5801 		return -ENOMEM;
5802 	}
5803 
5804 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5805 	num_regs = NUM_OF_SOB_IN_BLOCK;
5806 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5807 	if (rc) {
5808 		dev_err(hdev->dev, "failed resetting SM registers");
5809 		return -ENOMEM;
5810 	}
5811 
5812 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5813 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5814 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5815 	if (rc) {
5816 		dev_err(hdev->dev, "failed resetting SM registers");
5817 		return -ENOMEM;
5818 	}
5819 
5820 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5821 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5822 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5823 	if (rc) {
5824 		dev_err(hdev->dev, "failed resetting SM registers");
5825 		return -ENOMEM;
5826 	}
5827 
5828 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5829 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5830 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5831 	if (rc) {
5832 		dev_err(hdev->dev, "failed resetting SM registers");
5833 		return -ENOMEM;
5834 	}
5835 
5836 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5837 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5838 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5839 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5840 	if (rc) {
5841 		dev_err(hdev->dev, "failed resetting SM registers");
5842 		return -ENOMEM;
5843 	}
5844 
5845 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5846 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5847 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5848 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5849 	if (rc) {
5850 		dev_err(hdev->dev, "failed resetting SM registers");
5851 		return -ENOMEM;
5852 	}
5853 
5854 	return 0;
5855 }
5856 
5857 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5858 {
5859 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5860 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5861 	int i;
5862 
5863 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5864 		u64 sob_addr = CFG_BASE +
5865 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5866 				(i * sob_delta);
5867 		u32 dma_offset = i * DMA_CORE_OFFSET;
5868 
5869 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5870 				lower_32_bits(sob_addr));
5871 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5872 				upper_32_bits(sob_addr));
5873 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5874 
5875 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5876 		 * modified by the user for SRAM reduction
5877 		 */
5878 		if (i > 1)
5879 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5880 								0x00000001);
5881 	}
5882 }
5883 
5884 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5885 {
5886 	u32 qman_offset;
5887 	int i;
5888 
5889 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5890 		qman_offset = i * DMA_QMAN_OFFSET;
5891 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5892 	}
5893 
5894 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5895 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5896 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5897 	}
5898 
5899 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5900 		qman_offset = i * TPC_QMAN_OFFSET;
5901 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5902 	}
5903 
5904 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5905 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5906 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5907 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5908 	}
5909 }
5910 
5911 static int gaudi_restore_user_registers(struct hl_device *hdev)
5912 {
5913 	int rc;
5914 
5915 	rc = gaudi_restore_sm_registers(hdev);
5916 	if (rc)
5917 		return rc;
5918 
5919 	gaudi_restore_dma_registers(hdev);
5920 	gaudi_restore_qm_registers(hdev);
5921 
5922 	return 0;
5923 }
5924 
5925 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5926 {
5927 	return 0;
5928 }
5929 
5930 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5931 {
5932 	u32 size = hdev->asic_prop.mmu_pgt_size +
5933 			hdev->asic_prop.mmu_cache_mng_size;
5934 	struct gaudi_device *gaudi = hdev->asic_specific;
5935 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5936 
5937 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5938 		return 0;
5939 
5940 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5941 }
5942 
5943 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5944 {
5945 
5946 }
5947 
5948 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5949 					u32 size_to_dma, dma_addr_t dma_addr)
5950 {
5951 	u32 err_cause, val;
5952 	u64 dma_offset;
5953 	int rc;
5954 
5955 	dma_offset = dma_id * DMA_CORE_OFFSET;
5956 
5957 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5958 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5959 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5960 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5961 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5962 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5963 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5964 
5965 	rc = hl_poll_timeout(
5966 		hdev,
5967 		mmDMA0_CORE_STS0 + dma_offset,
5968 		val,
5969 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5970 		0,
5971 		1000000);
5972 
5973 	if (rc) {
5974 		dev_err(hdev->dev,
5975 			"DMA %d timed-out during reading of 0x%llx\n",
5976 			dma_id, addr);
5977 		return -EIO;
5978 	}
5979 
5980 	/* Verify DMA is OK */
5981 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982 	if (err_cause) {
5983 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5984 		dev_dbg(hdev->dev,
5985 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5986 			err_cause);
5987 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5988 
5989 		return -EIO;
5990 	}
5991 
5992 	return 0;
5993 }
5994 
5995 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5996 				void *blob_addr)
5997 {
5998 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5999 	u32 qm_glbl_sts0, qm_cgm_sts;
6000 	u64 dma_offset, qm_offset;
6001 	dma_addr_t dma_addr;
6002 	void *kernel_addr;
6003 	bool is_eng_idle;
6004 	int rc = 0, dma_id;
6005 
6006 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6007 
6008 	if (!kernel_addr)
6009 		return -ENOMEM;
6010 
6011 	hdev->asic_funcs->hw_queues_lock(hdev);
6012 
6013 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6014 	dma_offset = dma_id * DMA_CORE_OFFSET;
6015 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6016 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6017 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6018 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6019 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6020 		      IS_DMA_IDLE(dma_core_sts0);
6021 
6022 	if (!is_eng_idle) {
6023 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6024 		dma_offset = dma_id * DMA_CORE_OFFSET;
6025 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6026 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6027 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6028 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6029 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6030 			      IS_DMA_IDLE(dma_core_sts0);
6031 
6032 		if (!is_eng_idle) {
6033 			dev_err_ratelimited(hdev->dev,
6034 				"Can't read via DMA because it is BUSY\n");
6035 			rc = -EAGAIN;
6036 			goto out;
6037 		}
6038 	}
6039 
6040 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6041 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6042 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6043 
6044 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6045 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6046 	 * ASID
6047 	 */
6048 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6049 
6050 	/* Verify DMA is OK */
6051 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6052 	if (err_cause) {
6053 		dev_dbg(hdev->dev,
6054 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6055 			err_cause);
6056 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6057 	}
6058 
6059 	pos = 0;
6060 	size_left = size;
6061 	size_to_dma = SZ_2M;
6062 
6063 	while (size_left > 0) {
6064 
6065 		if (size_left < SZ_2M)
6066 			size_to_dma = size_left;
6067 
6068 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6069 						dma_addr);
6070 		if (rc)
6071 			break;
6072 
6073 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6074 
6075 		if (size_left <= SZ_2M)
6076 			break;
6077 
6078 		pos += SZ_2M;
6079 		addr += SZ_2M;
6080 		size_left -= SZ_2M;
6081 	}
6082 
6083 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6084 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6085 	 * ASID
6086 	 */
6087 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6088 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6089 
6090 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6091 
6092 out:
6093 	hdev->asic_funcs->hw_queues_unlock(hdev);
6094 
6095 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6096 
6097 	return rc;
6098 }
6099 
6100 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6101 {
6102 	struct gaudi_device *gaudi = hdev->asic_specific;
6103 
6104 	if (hdev->reset_info.hard_reset_pending)
6105 		return U64_MAX;
6106 
6107 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6108 			(addr - gaudi->hbm_bar_cur_addr));
6109 }
6110 
6111 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6112 {
6113 	struct gaudi_device *gaudi = hdev->asic_specific;
6114 
6115 	if (hdev->reset_info.hard_reset_pending)
6116 		return;
6117 
6118 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6119 			(addr - gaudi->hbm_bar_cur_addr));
6120 }
6121 
6122 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6123 {
6124 	/* mask to zero the MMBP and ASID bits */
6125 	WREG32_AND(reg, ~0x7FF);
6126 	WREG32_OR(reg, asid);
6127 }
6128 
6129 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6130 {
6131 	struct gaudi_device *gaudi = hdev->asic_specific;
6132 
6133 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6134 		return;
6135 
6136 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6137 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6138 		return;
6139 	}
6140 
6141 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6144 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6146 
6147 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152 
6153 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158 
6159 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6161 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164 
6165 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6168 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6170 
6171 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176 
6177 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182 
6183 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6185 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6188 
6189 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6197 
6198 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6205 
6206 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6213 
6214 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6215 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6221 
6222 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6223 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6224 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6225 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6226 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6227 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6228 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6229 
6230 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6231 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6232 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6233 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6234 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6235 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6236 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6237 
6238 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6239 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6240 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6241 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6242 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6243 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6244 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6245 
6246 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6247 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6248 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6249 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6250 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6251 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6252 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6253 
6254 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6255 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6256 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6257 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6258 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6259 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6260 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6261 
6262 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6263 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6264 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6265 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6266 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6267 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6268 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6269 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6270 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6271 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6272 
6273 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6274 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6275 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6276 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6277 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6278 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6279 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6280 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6281 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6282 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6283 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6284 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6285 
6286 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6287 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6288 				asid);
6289 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6290 				asid);
6291 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6292 				asid);
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6296 				asid);
6297 	}
6298 
6299 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6301 				asid);
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6305 				asid);
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6309 				asid);
6310 	}
6311 
6312 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6314 				asid);
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6318 				asid);
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6322 				asid);
6323 	}
6324 
6325 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6327 				asid);
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6331 				asid);
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6335 				asid);
6336 	}
6337 
6338 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6339 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6340 				asid);
6341 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6342 				asid);
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6344 				asid);
6345 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6346 				asid);
6347 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6348 				asid);
6349 	}
6350 
6351 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6352 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6353 				asid);
6354 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6355 				asid);
6356 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6357 				asid);
6358 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6359 				asid);
6360 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6361 				asid);
6362 	}
6363 
6364 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6365 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6366 				asid);
6367 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6368 				asid);
6369 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6370 				asid);
6371 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6372 				asid);
6373 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6374 				asid);
6375 	}
6376 
6377 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6378 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6379 				asid);
6380 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6381 				asid);
6382 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6383 				asid);
6384 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6385 				asid);
6386 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6387 				asid);
6388 	}
6389 
6390 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6391 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6392 				asid);
6393 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6394 				asid);
6395 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6396 				asid);
6397 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6398 				asid);
6399 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6400 				asid);
6401 	}
6402 
6403 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6404 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6405 				asid);
6406 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6407 				asid);
6408 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6409 				asid);
6410 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6411 				asid);
6412 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6413 				asid);
6414 	}
6415 
6416 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6417 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6418 }
6419 
6420 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6421 		struct hl_cs_job *job)
6422 {
6423 	struct packet_msg_prot *fence_pkt;
6424 	u32 *fence_ptr;
6425 	dma_addr_t fence_dma_addr;
6426 	struct hl_cb *cb;
6427 	u32 tmp, timeout, dma_offset;
6428 	int rc;
6429 
6430 	if (hdev->pldm)
6431 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6432 	else
6433 		timeout = HL_DEVICE_TIMEOUT_USEC;
6434 
6435 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6436 		dev_err_ratelimited(hdev->dev,
6437 			"Can't send driver job on QMAN0 because the device is not idle\n");
6438 		return -EBUSY;
6439 	}
6440 
6441 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6442 	if (!fence_ptr) {
6443 		dev_err(hdev->dev,
6444 			"Failed to allocate fence memory for QMAN0\n");
6445 		return -ENOMEM;
6446 	}
6447 
6448 	cb = job->patched_cb;
6449 
6450 	fence_pkt = cb->kernel_address +
6451 			job->job_cb_size - sizeof(struct packet_msg_prot);
6452 
6453 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6454 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6455 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6456 
6457 	fence_pkt->ctl = cpu_to_le32(tmp);
6458 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6459 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6460 
6461 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6462 
6463 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6464 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465 
6466 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6467 					job->job_cb_size, cb->bus_address);
6468 	if (rc) {
6469 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6470 		goto free_fence_ptr;
6471 	}
6472 
6473 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6474 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6475 				timeout, true);
6476 
6477 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6478 
6479 	if (rc == -ETIMEDOUT) {
6480 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6481 		goto free_fence_ptr;
6482 	}
6483 
6484 free_fence_ptr:
6485 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6486 
6487 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6488 	return rc;
6489 }
6490 
6491 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6492 {
6493 	if (event_type >= GAUDI_EVENT_SIZE)
6494 		goto event_not_supported;
6495 
6496 	if (!gaudi_irq_map_table[event_type].valid)
6497 		goto event_not_supported;
6498 
6499 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6500 
6501 	return;
6502 
6503 event_not_supported:
6504 	snprintf(desc, size, "N/A");
6505 }
6506 
6507 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6508 							bool is_write, u16 *engine_id_1,
6509 							u16 *engine_id_2)
6510 {
6511 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6512 
6513 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6514 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6515 
6516 	switch (x_y) {
6517 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6518 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6519 		dma_id[0] = 0;
6520 		dma_id[1] = 2;
6521 		break;
6522 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6523 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6524 		dma_id[0] = 1;
6525 		dma_id[1] = 3;
6526 		break;
6527 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6528 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6529 		dma_id[0] = 4;
6530 		dma_id[1] = 6;
6531 		break;
6532 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6533 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6534 		dma_id[0] = 5;
6535 		dma_id[1] = 7;
6536 		break;
6537 	default:
6538 		goto unknown_initiator;
6539 	}
6540 
6541 	for (i = 0 ; i < 2 ; i++) {
6542 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6543 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6544 	}
6545 
6546 	switch (x_y) {
6547 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6548 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6549 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6550 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6551 			return "DMA0";
6552 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6553 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6554 			return "DMA2";
6555 		} else {
6556 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6557 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6558 			return "DMA0 or DMA2";
6559 		}
6560 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6561 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6562 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6563 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6564 			return "DMA1";
6565 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6566 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6567 			return "DMA3";
6568 		} else {
6569 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6570 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6571 			return "DMA1 or DMA3";
6572 		}
6573 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6574 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6575 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6576 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6577 			return "DMA4";
6578 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6579 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6580 			return "DMA6";
6581 		} else {
6582 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6583 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6584 			return "DMA4 or DMA6";
6585 		}
6586 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6587 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6588 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6589 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6590 			return "DMA5";
6591 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6592 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6593 			return "DMA7";
6594 		} else {
6595 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6596 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6597 			return "DMA5 or DMA7";
6598 		}
6599 	}
6600 
6601 unknown_initiator:
6602 	return "unknown initiator";
6603 }
6604 
6605 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6606 							u16 *engine_id_1, u16 *engine_id_2)
6607 {
6608 	u32 val, x_y, axi_id;
6609 
6610 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6611 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6612 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6613 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6614 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6615 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6616 
6617 	switch (x_y) {
6618 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6619 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6620 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6621 			return "TPC0";
6622 		}
6623 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6624 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6625 			return "NIC0";
6626 		}
6627 		break;
6628 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6629 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6630 		return "TPC1";
6631 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6632 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6633 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6634 		return "MME0";
6635 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6636 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6637 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6638 		return "MME1";
6639 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6640 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6641 		return "TPC2";
6642 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6643 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6644 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6645 			return "TPC3";
6646 		}
6647 		/* PCI, CPU or PSOC does not have engine id*/
6648 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6649 			return "PCI";
6650 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6651 			return "CPU";
6652 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6653 			return "PSOC";
6654 		break;
6655 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6656 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6657 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6658 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6659 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6660 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6661 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6662 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6663 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6664 				engine_id_1, engine_id_2);
6665 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6666 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6667 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6668 			return "TPC4";
6669 		}
6670 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6671 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6672 			return "NIC1";
6673 		}
6674 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6675 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6676 			return "NIC2";
6677 		}
6678 		break;
6679 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6680 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6681 		return "TPC5";
6682 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6683 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6684 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6685 		return "MME2";
6686 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6687 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6688 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6689 		return "MME3";
6690 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6691 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6692 		return "TPC6";
6693 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6694 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6695 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6696 			return "TPC7";
6697 		}
6698 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6699 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6700 			return "NIC4";
6701 		}
6702 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6703 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6704 			return "NIC5";
6705 		}
6706 		break;
6707 	default:
6708 		break;
6709 	}
6710 
6711 	dev_err(hdev->dev,
6712 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6713 		val,
6714 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6715 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6716 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6717 			RAZWI_INITIATOR_AXI_ID_MASK);
6718 
6719 	return "unknown initiator";
6720 }
6721 
6722 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6723 						u16 *engine_id_2, bool *is_read, bool *is_write)
6724 {
6725 
6726 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6727 		dev_err_ratelimited(hdev->dev,
6728 			"RAZWI event caused by illegal write of %s\n",
6729 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6730 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6731 		*is_write = true;
6732 	}
6733 
6734 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6735 		dev_err_ratelimited(hdev->dev,
6736 			"RAZWI event caused by illegal read of %s\n",
6737 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6738 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6739 		*is_read = true;
6740 	}
6741 }
6742 
6743 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6744 {
6745 	struct gaudi_device *gaudi = hdev->asic_specific;
6746 	u32 val;
6747 
6748 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6749 		return;
6750 
6751 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6752 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6753 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6754 		*addr <<= 32;
6755 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6756 
6757 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6758 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6759 
6760 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6761 	}
6762 
6763 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6764 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6765 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6766 		*addr <<= 32;
6767 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6768 
6769 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6770 
6771 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6772 	}
6773 }
6774 
6775 /*
6776  *  +-------------------+------------------------------------------------------+
6777  *  | Configuration Reg |                     Description                      |
6778  *  |      Address      |                                                      |
6779  *  +-------------------+------------------------------------------------------+
6780  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6781  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6782  *  |                   |0xF34 memory wrappers 63:32                           |
6783  *  |                   |0xF38 memory wrappers 95:64                           |
6784  *  |                   |0xF3C memory wrappers 127:96                          |
6785  *  +-------------------+------------------------------------------------------+
6786  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6787  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6788  *  |                   |0xF44 memory wrappers 63:32                           |
6789  *  |                   |0xF48 memory wrappers 95:64                           |
6790  *  |                   |0xF4C memory wrappers 127:96                          |
6791  *  +-------------------+------------------------------------------------------+
6792  */
6793 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6794 		struct ecc_info_extract_params *params, u64 *ecc_address,
6795 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6796 {
6797 	u32 i, num_mem_regs, reg, err_bit;
6798 	u64 err_addr, err_word = 0;
6799 
6800 	num_mem_regs = params->num_memories / 32 +
6801 			((params->num_memories % 32) ? 1 : 0);
6802 
6803 	if (params->block_address >= CFG_BASE)
6804 		params->block_address -= CFG_BASE;
6805 
6806 	if (params->derr)
6807 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6808 	else
6809 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6810 
6811 	/* Set invalid wrapper index */
6812 	*memory_wrapper_idx = 0xFF;
6813 
6814 	/* Iterate through memory wrappers, a single bit must be set */
6815 	for (i = 0 ; i < num_mem_regs ; i++) {
6816 		err_addr += i * 4;
6817 		err_word = RREG32(err_addr);
6818 		if (err_word) {
6819 			err_bit = __ffs(err_word);
6820 			*memory_wrapper_idx = err_bit + (32 * i);
6821 			break;
6822 		}
6823 	}
6824 
6825 	if (*memory_wrapper_idx == 0xFF) {
6826 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6827 		return -EINVAL;
6828 	}
6829 
6830 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6831 			*memory_wrapper_idx);
6832 
6833 	*ecc_address =
6834 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6835 	*ecc_syndrom =
6836 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6837 
6838 	/* Clear error indication */
6839 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6840 	if (params->derr)
6841 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6842 	else
6843 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6844 
6845 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6846 
6847 	return 0;
6848 }
6849 
6850 /*
6851  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6852  *
6853  * @idx: the current pi/ci value
6854  * @q_len: the queue length (power of 2)
6855  *
6856  * @return the cyclically decremented index
6857  */
6858 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6859 {
6860 	u32 mask = q_len - 1;
6861 
6862 	/*
6863 	 * modular decrement is equivalent to adding (queue_size -1)
6864 	 * later we take LSBs to make sure the value is in the
6865 	 * range [0, queue_len - 1]
6866 	 */
6867 	return (idx + q_len - 1) & mask;
6868 }
6869 
6870 /**
6871  * gaudi_handle_sw_config_stream_data - print SW config stream data
6872  *
6873  * @hdev: pointer to the habanalabs device structure
6874  * @stream: the QMAN's stream
6875  * @qman_base: base address of QMAN registers block
6876  * @event_mask: mask of the last events occurred
6877  */
6878 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6879 						u64 qman_base, u64 event_mask)
6880 {
6881 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6882 	u32 cq_ptr_lo_off, size;
6883 
6884 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6885 
6886 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6887 						stream * cq_ptr_lo_off;
6888 	cq_ptr_hi = cq_ptr_lo +
6889 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6890 	cq_tsize = cq_ptr_lo +
6891 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6892 
6893 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6894 	size = RREG32(cq_tsize);
6895 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6896 							stream, cq_ptr, size);
6897 
6898 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6899 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6900 		hdev->captured_err_info.undef_opcode.cq_size = size;
6901 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6902 	}
6903 }
6904 
6905 /**
6906  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6907  *
6908  * @hdev: pointer to the habanalabs device structure
6909  * @qid_base: first QID of the QMAN (out of 4 streams)
6910  * @stream: the QMAN's stream
6911  * @qman_base: base address of QMAN registers block
6912  * @event_mask: mask of the last events occurred
6913  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6914  */
6915 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6916 						u32 stream, u64 qman_base,
6917 						u64 event_mask,
6918 						bool pr_sw_conf)
6919 {
6920 	u32 ci, qm_ci_stream_off, queue_len;
6921 	struct hl_hw_queue *q;
6922 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6923 	int i;
6924 
6925 	q = &hdev->kernel_queues[qid_base + stream];
6926 
6927 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6928 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6929 						stream * qm_ci_stream_off;
6930 
6931 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6932 					q->int_queue_len : HL_QUEUE_LENGTH;
6933 
6934 	hdev->asic_funcs->hw_queues_lock(hdev);
6935 
6936 	if (pr_sw_conf)
6937 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6938 
6939 	ci = RREG32(pq_ci);
6940 
6941 	/* we should start printing form ci -1 */
6942 	ci = gaudi_queue_idx_dec(ci, queue_len);
6943 	memset(addr, 0, sizeof(addr));
6944 
6945 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6946 		struct hl_bd *bd;
6947 		u32 len;
6948 
6949 		bd = q->kernel_address;
6950 		bd += ci;
6951 
6952 		len = le32_to_cpu(bd->len);
6953 		/* len 0 means uninitialized entry- break */
6954 		if (!len)
6955 			break;
6956 
6957 		addr[i] = le64_to_cpu(bd->ptr);
6958 
6959 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6960 							stream, ci, addr[i], len);
6961 
6962 		/* get previous ci, wrap if needed */
6963 		ci = gaudi_queue_idx_dec(ci, queue_len);
6964 	}
6965 
6966 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6967 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6968 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6969 
6970 		if (arr_idx == 0) {
6971 			undef_opcode->timestamp = ktime_get();
6972 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6973 		}
6974 
6975 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6976 		undef_opcode->cb_addr_streams_len++;
6977 	}
6978 
6979 	hdev->asic_funcs->hw_queues_unlock(hdev);
6980 }
6981 
6982 /**
6983  * handle_qman_data_on_err - extract QMAN data on error
6984  *
6985  * @hdev: pointer to the habanalabs device structure
6986  * @qid_base: first QID of the QMAN (out of 4 streams)
6987  * @stream: the QMAN's stream
6988  * @qman_base: base address of QMAN registers block
6989  * @event_mask: mask of the last events occurred
6990  *
6991  * This function attempt to exatract as much data as possible on QMAN error.
6992  * On upper CP print the SW config stream data and last 8 PQEs.
6993  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6994  */
6995 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6996 				   u32 stream, u64 qman_base, u64 event_mask)
6997 {
6998 	u32 i;
6999 
7000 	if (stream != QMAN_STREAMS) {
7001 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7002 			qman_base, event_mask, true);
7003 		return;
7004 	}
7005 
7006 	/* handle Lower-CP */
7007 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7008 
7009 	for (i = 0; i < QMAN_STREAMS; i++)
7010 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7011 			qman_base, event_mask, false);
7012 }
7013 
7014 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7015 					  const char *qm_name,
7016 					  u64 qman_base,
7017 					  u32 qid_base,
7018 					  u64 *event_mask)
7019 {
7020 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7021 	u64 glbl_sts_addr, arb_err_addr;
7022 	char reg_desc[32];
7023 
7024 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7025 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7026 
7027 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7028 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7029 		glbl_sts_clr_val = 0;
7030 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7031 
7032 		if (!glbl_sts_val)
7033 			continue;
7034 
7035 		if (i == QMAN_STREAMS)
7036 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7037 		else
7038 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7039 
7040 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7041 			if (glbl_sts_val & BIT(j)) {
7042 				dev_err_ratelimited(hdev->dev,
7043 						"%s %s. err cause: %s\n",
7044 						qm_name, reg_desc,
7045 						gaudi_qman_error_cause[j]);
7046 				glbl_sts_clr_val |= BIT(j);
7047 			}
7048 		}
7049 		/* check for undefined opcode */
7050 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7051 				hdev->captured_err_info.undef_opcode.write_enable) {
7052 			memset(&hdev->captured_err_info.undef_opcode, 0,
7053 						sizeof(hdev->captured_err_info.undef_opcode));
7054 
7055 			hdev->captured_err_info.undef_opcode.write_enable = false;
7056 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7057 		}
7058 
7059 		/* Write 1 clear errors */
7060 		if (!hdev->stop_on_err)
7061 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7062 		else
7063 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7064 	}
7065 
7066 	arb_err_val = RREG32(arb_err_addr);
7067 
7068 	if (!arb_err_val)
7069 		return;
7070 
7071 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7072 		if (arb_err_val & BIT(j)) {
7073 			dev_err_ratelimited(hdev->dev,
7074 					"%s ARB_ERR. err cause: %s\n",
7075 					qm_name,
7076 					gaudi_qman_arb_error_cause[j]);
7077 		}
7078 	}
7079 }
7080 
7081 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7082 		struct hl_eq_sm_sei_data *sei_data)
7083 {
7084 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7085 
7086 	/* Flip the bits as the enum is ordered in the opposite way */
7087 	index = (index ^ 0x3) & 0x3;
7088 
7089 	switch (sei_data->sei_cause) {
7090 	case SM_SEI_SO_OVERFLOW:
7091 		dev_err_ratelimited(hdev->dev,
7092 			"%s SEI Error: SOB Group %u overflow/underflow",
7093 			gaudi_sync_manager_names[index],
7094 			le32_to_cpu(sei_data->sei_log));
7095 		break;
7096 	case SM_SEI_LBW_4B_UNALIGNED:
7097 		dev_err_ratelimited(hdev->dev,
7098 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7099 			gaudi_sync_manager_names[index],
7100 			le32_to_cpu(sei_data->sei_log));
7101 		break;
7102 	case SM_SEI_AXI_RESPONSE_ERR:
7103 		dev_err_ratelimited(hdev->dev,
7104 			"%s SEI Error: AXI ID %u response error",
7105 			gaudi_sync_manager_names[index],
7106 			le32_to_cpu(sei_data->sei_log));
7107 		break;
7108 	default:
7109 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7110 				le32_to_cpu(sei_data->sei_log));
7111 		break;
7112 	}
7113 }
7114 
7115 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7116 		struct hl_eq_ecc_data *ecc_data)
7117 {
7118 	struct ecc_info_extract_params params;
7119 	u64 ecc_address = 0, ecc_syndrom = 0;
7120 	u8 index, memory_wrapper_idx = 0;
7121 	bool extract_info_from_fw;
7122 	int rc;
7123 
7124 	if (hdev->asic_prop.fw_security_enabled) {
7125 		extract_info_from_fw = true;
7126 		goto extract_ecc_info;
7127 	}
7128 
7129 	switch (event_type) {
7130 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7131 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7132 		extract_info_from_fw = true;
7133 		break;
7134 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7135 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7136 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7137 		params.num_memories = 90;
7138 		params.derr = false;
7139 		extract_info_from_fw = false;
7140 		break;
7141 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7142 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7143 		params.block_address =
7144 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7145 		params.num_memories = 90;
7146 		params.derr = true;
7147 		extract_info_from_fw = false;
7148 		break;
7149 	case GAUDI_EVENT_MME0_ACC_SERR:
7150 	case GAUDI_EVENT_MME1_ACC_SERR:
7151 	case GAUDI_EVENT_MME2_ACC_SERR:
7152 	case GAUDI_EVENT_MME3_ACC_SERR:
7153 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7154 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7155 		params.num_memories = 128;
7156 		params.derr = false;
7157 		extract_info_from_fw = false;
7158 		break;
7159 	case GAUDI_EVENT_MME0_ACC_DERR:
7160 	case GAUDI_EVENT_MME1_ACC_DERR:
7161 	case GAUDI_EVENT_MME2_ACC_DERR:
7162 	case GAUDI_EVENT_MME3_ACC_DERR:
7163 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7164 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7165 		params.num_memories = 128;
7166 		params.derr = true;
7167 		extract_info_from_fw = false;
7168 		break;
7169 	case GAUDI_EVENT_MME0_SBAB_SERR:
7170 	case GAUDI_EVENT_MME1_SBAB_SERR:
7171 	case GAUDI_EVENT_MME2_SBAB_SERR:
7172 	case GAUDI_EVENT_MME3_SBAB_SERR:
7173 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7174 		params.block_address =
7175 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7176 		params.num_memories = 33;
7177 		params.derr = false;
7178 		extract_info_from_fw = false;
7179 		break;
7180 	case GAUDI_EVENT_MME0_SBAB_DERR:
7181 	case GAUDI_EVENT_MME1_SBAB_DERR:
7182 	case GAUDI_EVENT_MME2_SBAB_DERR:
7183 	case GAUDI_EVENT_MME3_SBAB_DERR:
7184 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7185 		params.block_address =
7186 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7187 		params.num_memories = 33;
7188 		params.derr = true;
7189 		extract_info_from_fw = false;
7190 		break;
7191 	default:
7192 		return;
7193 	}
7194 
7195 extract_ecc_info:
7196 	if (extract_info_from_fw) {
7197 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7198 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7199 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7200 	} else {
7201 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7202 				&ecc_syndrom, &memory_wrapper_idx);
7203 		if (rc)
7204 			return;
7205 	}
7206 
7207 	dev_err(hdev->dev,
7208 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7209 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7210 }
7211 
7212 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7213 {
7214 	u64 qman_base;
7215 	char desc[32];
7216 	u32 qid_base;
7217 	u8 index;
7218 
7219 	switch (event_type) {
7220 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7221 		index = event_type - GAUDI_EVENT_TPC0_QM;
7222 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7223 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7224 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7225 		break;
7226 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7227 		if (event_type == GAUDI_EVENT_MME0_QM) {
7228 			index = 0;
7229 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7230 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7231 			index = 2;
7232 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7233 		}
7234 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7235 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7236 		break;
7237 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7238 		index = event_type - GAUDI_EVENT_DMA0_QM;
7239 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7240 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7241 		if (index > 1)
7242 			qid_base++;
7243 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7244 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7245 		break;
7246 	case GAUDI_EVENT_NIC0_QM0:
7247 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7248 		qman_base = mmNIC0_QM0_BASE;
7249 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7250 		break;
7251 	case GAUDI_EVENT_NIC0_QM1:
7252 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7253 		qman_base = mmNIC0_QM1_BASE;
7254 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7255 		break;
7256 	case GAUDI_EVENT_NIC1_QM0:
7257 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7258 		qman_base = mmNIC1_QM0_BASE;
7259 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7260 		break;
7261 	case GAUDI_EVENT_NIC1_QM1:
7262 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7263 		qman_base = mmNIC1_QM1_BASE;
7264 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7265 		break;
7266 	case GAUDI_EVENT_NIC2_QM0:
7267 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7268 		qman_base = mmNIC2_QM0_BASE;
7269 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7270 		break;
7271 	case GAUDI_EVENT_NIC2_QM1:
7272 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7273 		qman_base = mmNIC2_QM1_BASE;
7274 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7275 		break;
7276 	case GAUDI_EVENT_NIC3_QM0:
7277 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7278 		qman_base = mmNIC3_QM0_BASE;
7279 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7280 		break;
7281 	case GAUDI_EVENT_NIC3_QM1:
7282 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7283 		qman_base = mmNIC3_QM1_BASE;
7284 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7285 		break;
7286 	case GAUDI_EVENT_NIC4_QM0:
7287 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7288 		qman_base = mmNIC4_QM0_BASE;
7289 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7290 		break;
7291 	case GAUDI_EVENT_NIC4_QM1:
7292 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7293 		qman_base = mmNIC4_QM1_BASE;
7294 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7295 		break;
7296 	default:
7297 		return;
7298 	}
7299 
7300 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7301 }
7302 
7303 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7304 					bool razwi, u64 *event_mask)
7305 {
7306 	bool is_read = false, is_write = false;
7307 	u16 engine_id[2], num_of_razwi_eng = 0;
7308 	char desc[64] = "";
7309 	u64 razwi_addr = 0;
7310 	u8 razwi_flags = 0;
7311 
7312 	/*
7313 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7314 	 * engine id it will get valid value.
7315 	 */
7316 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7317 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7318 
7319 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7320 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7321 		event_type, desc);
7322 
7323 	if (razwi) {
7324 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7325 						&is_write);
7326 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7327 
7328 		if (is_read)
7329 			razwi_flags |= HL_RAZWI_READ;
7330 		if (is_write)
7331 			razwi_flags |= HL_RAZWI_WRITE;
7332 
7333 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7334 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7335 				num_of_razwi_eng = 2;
7336 			else
7337 				num_of_razwi_eng = 1;
7338 		}
7339 
7340 		hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags,
7341 				event_mask);
7342 	}
7343 }
7344 
7345 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7346 					struct cpucp_pkt_sync_err *sync_err)
7347 {
7348 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7349 
7350 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7351 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7352 }
7353 
7354 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7355 					struct hl_eq_fw_alive *fw_alive)
7356 {
7357 	dev_err(hdev->dev,
7358 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7359 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7360 		le32_to_cpu(fw_alive->process_id),
7361 		le32_to_cpu(fw_alive->thread_id),
7362 		le64_to_cpu(fw_alive->uptime_seconds));
7363 }
7364 
7365 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7366 						void *data)
7367 {
7368 	char desc[64] = "", *type;
7369 	struct eq_nic_sei_event *eq_nic_sei = data;
7370 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7371 
7372 	switch (eq_nic_sei->axi_error_cause) {
7373 	case RXB:
7374 		type = "RXB";
7375 		break;
7376 	case RXE:
7377 		type = "RXE";
7378 		break;
7379 	case TXS:
7380 		type = "TXS";
7381 		break;
7382 	case TXE:
7383 		type = "TXE";
7384 		break;
7385 	case QPC_RESP:
7386 		type = "QPC_RESP";
7387 		break;
7388 	case NON_AXI_ERR:
7389 		type = "NON_AXI_ERR";
7390 		break;
7391 	case TMR:
7392 		type = "TMR";
7393 		break;
7394 	default:
7395 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7396 			eq_nic_sei->axi_error_cause);
7397 		type = "N/A";
7398 		break;
7399 	}
7400 
7401 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7402 			eq_nic_sei->id);
7403 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7404 		event_type, desc);
7405 }
7406 
7407 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7408 {
7409 	/* GAUDI doesn't support any reset except hard-reset */
7410 	return -EPERM;
7411 }
7412 
7413 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7414 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7415 {
7416 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7417 	int rc = 0;
7418 
7419 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7420 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7421 		if (!hbm_ecc_data) {
7422 			dev_err(hdev->dev, "No FW ECC data");
7423 			return 0;
7424 		}
7425 
7426 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7427 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7428 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7429 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7430 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7431 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7433 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7435 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7437 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7438 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7439 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7440 
7441 		dev_err(hdev->dev,
7442 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7443 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7444 		dev_err(hdev->dev,
7445 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7446 			device, ch, hbm_ecc_data->first_addr, type,
7447 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7448 			hbm_ecc_data->dec_cnt);
7449 		return 0;
7450 	}
7451 
7452 	if (hdev->asic_prop.fw_security_enabled) {
7453 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7454 		return 0;
7455 	}
7456 
7457 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7458 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7459 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7460 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7461 		if (val) {
7462 			rc = -EIO;
7463 			dev_err(hdev->dev,
7464 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7465 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7466 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7467 				(val >> 4) & 0x1);
7468 
7469 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7470 			dev_err(hdev->dev,
7471 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7472 				device, ch * 2,
7473 				RREG32(base + ch * 0x1000 + 0x064),
7474 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7475 				(val2 & 0xFF0000) >> 16,
7476 				(val2 & 0xFF000000) >> 24);
7477 		}
7478 
7479 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7480 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7481 		if (val) {
7482 			rc = -EIO;
7483 			dev_err(hdev->dev,
7484 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7485 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7486 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7487 				(val >> 4) & 0x1);
7488 
7489 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7490 			dev_err(hdev->dev,
7491 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7492 				device, ch * 2 + 1,
7493 				RREG32(base + ch * 0x1000 + 0x074),
7494 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7495 				(val2 & 0xFF0000) >> 16,
7496 				(val2 & 0xFF000000) >> 24);
7497 		}
7498 
7499 		/* Clear interrupts */
7500 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7501 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7502 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7503 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7504 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7505 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7506 	}
7507 
7508 	val  = RREG32(base + 0x8F30);
7509 	val2 = RREG32(base + 0x8F34);
7510 	if (val | val2) {
7511 		rc = -EIO;
7512 		dev_err(hdev->dev,
7513 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7514 			device, val, val2);
7515 	}
7516 	val  = RREG32(base + 0x8F40);
7517 	val2 = RREG32(base + 0x8F44);
7518 	if (val | val2) {
7519 		rc = -EIO;
7520 		dev_err(hdev->dev,
7521 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7522 			device, val, val2);
7523 	}
7524 
7525 	return rc;
7526 }
7527 
7528 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7529 {
7530 	switch (hbm_event_type) {
7531 	case GAUDI_EVENT_HBM0_SPI_0:
7532 	case GAUDI_EVENT_HBM0_SPI_1:
7533 		return 0;
7534 	case GAUDI_EVENT_HBM1_SPI_0:
7535 	case GAUDI_EVENT_HBM1_SPI_1:
7536 		return 1;
7537 	case GAUDI_EVENT_HBM2_SPI_0:
7538 	case GAUDI_EVENT_HBM2_SPI_1:
7539 		return 2;
7540 	case GAUDI_EVENT_HBM3_SPI_0:
7541 	case GAUDI_EVENT_HBM3_SPI_1:
7542 		return 3;
7543 	default:
7544 		break;
7545 	}
7546 
7547 	/* Should never happen */
7548 	return 0;
7549 }
7550 
7551 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7552 					char *interrupt_name)
7553 {
7554 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7555 	bool soft_reset_required = false;
7556 
7557 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7558 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7559 
7560 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7561 		if (tpc_interrupts_cause & BIT(i)) {
7562 			dev_err_ratelimited(hdev->dev,
7563 					"TPC%d_%s interrupt cause: %s\n",
7564 					tpc_id, interrupt_name,
7565 					gaudi_tpc_interrupts_cause[i]);
7566 			/* If this is QM error, we need to soft-reset */
7567 			if (i == 15)
7568 				soft_reset_required = true;
7569 		}
7570 
7571 	/* Clear interrupts */
7572 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7573 
7574 	return soft_reset_required;
7575 }
7576 
7577 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7578 {
7579 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7580 }
7581 
7582 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7583 {
7584 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7585 }
7586 
7587 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7588 {
7589 	ktime_t zero_time = ktime_set(0, 0);
7590 
7591 	mutex_lock(&hdev->clk_throttling.lock);
7592 
7593 	switch (event_type) {
7594 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7595 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7596 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7597 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7598 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7599 		dev_info_ratelimited(hdev->dev,
7600 			"Clock throttling due to power consumption\n");
7601 		break;
7602 
7603 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7604 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7605 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7606 		dev_info_ratelimited(hdev->dev,
7607 			"Power envelop is safe, back to optimal clock\n");
7608 		break;
7609 
7610 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7611 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7612 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7613 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7614 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7615 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7616 		dev_info_ratelimited(hdev->dev,
7617 			"Clock throttling due to overheating\n");
7618 		break;
7619 
7620 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7621 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7622 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7623 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7624 		dev_info_ratelimited(hdev->dev,
7625 			"Thermal envelop is safe, back to optimal clock\n");
7626 		break;
7627 
7628 	default:
7629 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7630 			event_type);
7631 		break;
7632 	}
7633 
7634 	mutex_unlock(&hdev->clk_throttling.lock);
7635 }
7636 
7637 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7638 {
7639 	struct gaudi_device *gaudi = hdev->asic_specific;
7640 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7641 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7642 	u32 fw_fatal_err_flag = 0, flags = 0;
7643 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7644 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7645 	bool reset_required, reset_direct = false;
7646 	u8 cause;
7647 	int rc;
7648 
7649 	if (event_type >= GAUDI_EVENT_SIZE) {
7650 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7651 				event_type, GAUDI_EVENT_SIZE - 1);
7652 		return;
7653 	}
7654 
7655 	gaudi->events_stat[event_type]++;
7656 	gaudi->events_stat_aggregate[event_type]++;
7657 
7658 	switch (event_type) {
7659 	case GAUDI_EVENT_PCIE_CORE_DERR:
7660 	case GAUDI_EVENT_PCIE_IF_DERR:
7661 	case GAUDI_EVENT_PCIE_PHY_DERR:
7662 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7663 	case GAUDI_EVENT_MME0_ACC_DERR:
7664 	case GAUDI_EVENT_MME0_SBAB_DERR:
7665 	case GAUDI_EVENT_MME1_ACC_DERR:
7666 	case GAUDI_EVENT_MME1_SBAB_DERR:
7667 	case GAUDI_EVENT_MME2_ACC_DERR:
7668 	case GAUDI_EVENT_MME2_SBAB_DERR:
7669 	case GAUDI_EVENT_MME3_ACC_DERR:
7670 	case GAUDI_EVENT_MME3_SBAB_DERR:
7671 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7672 		fallthrough;
7673 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7674 	case GAUDI_EVENT_PSOC_MEM_DERR:
7675 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7676 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7677 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7678 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7679 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7680 	case GAUDI_EVENT_MMU_DERR:
7681 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7682 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7683 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7684 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7685 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7686 		goto reset_device;
7687 
7688 	case GAUDI_EVENT_GIC500:
7689 	case GAUDI_EVENT_AXI_ECC:
7690 	case GAUDI_EVENT_L2_RAM_ECC:
7691 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7692 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7693 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7694 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7695 		goto reset_device;
7696 
7697 	case GAUDI_EVENT_HBM0_SPI_0:
7698 	case GAUDI_EVENT_HBM1_SPI_0:
7699 	case GAUDI_EVENT_HBM2_SPI_0:
7700 	case GAUDI_EVENT_HBM3_SPI_0:
7701 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7702 		gaudi_hbm_read_interrupts(hdev,
7703 				gaudi_hbm_event_to_dev(event_type),
7704 				&eq_entry->hbm_ecc_data);
7705 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7706 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7707 		goto reset_device;
7708 
7709 	case GAUDI_EVENT_HBM0_SPI_1:
7710 	case GAUDI_EVENT_HBM1_SPI_1:
7711 	case GAUDI_EVENT_HBM2_SPI_1:
7712 	case GAUDI_EVENT_HBM3_SPI_1:
7713 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7714 		gaudi_hbm_read_interrupts(hdev,
7715 				gaudi_hbm_event_to_dev(event_type),
7716 				&eq_entry->hbm_ecc_data);
7717 		hl_fw_unmask_irq(hdev, event_type);
7718 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7719 		break;
7720 
7721 	case GAUDI_EVENT_TPC0_DEC:
7722 	case GAUDI_EVENT_TPC1_DEC:
7723 	case GAUDI_EVENT_TPC2_DEC:
7724 	case GAUDI_EVENT_TPC3_DEC:
7725 	case GAUDI_EVENT_TPC4_DEC:
7726 	case GAUDI_EVENT_TPC5_DEC:
7727 	case GAUDI_EVENT_TPC6_DEC:
7728 	case GAUDI_EVENT_TPC7_DEC:
7729 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7730 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7731 		 * The SW upper layer will inspect an internal mapped area to indicate
7732 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7733 		 */
7734 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7735 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7736 		reset_required = gaudi_tpc_read_interrupts(hdev,
7737 					tpc_dec_event_to_tpc_id(event_type),
7738 					"AXI_SLV_DEC_Error");
7739 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7740 		if (reset_required) {
7741 			dev_err(hdev->dev, "reset required due to %s\n",
7742 				gaudi_irq_map_table[event_type].name);
7743 
7744 			reset_direct = true;
7745 			goto reset_device;
7746 		} else {
7747 			hl_fw_unmask_irq(hdev, event_type);
7748 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7749 		}
7750 		break;
7751 
7752 	case GAUDI_EVENT_TPC0_KRN_ERR:
7753 	case GAUDI_EVENT_TPC1_KRN_ERR:
7754 	case GAUDI_EVENT_TPC2_KRN_ERR:
7755 	case GAUDI_EVENT_TPC3_KRN_ERR:
7756 	case GAUDI_EVENT_TPC4_KRN_ERR:
7757 	case GAUDI_EVENT_TPC5_KRN_ERR:
7758 	case GAUDI_EVENT_TPC6_KRN_ERR:
7759 	case GAUDI_EVENT_TPC7_KRN_ERR:
7760 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7761 		reset_required = gaudi_tpc_read_interrupts(hdev,
7762 					tpc_krn_event_to_tpc_id(event_type),
7763 					"KRN_ERR");
7764 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7765 		if (reset_required) {
7766 			dev_err(hdev->dev, "reset required due to %s\n",
7767 				gaudi_irq_map_table[event_type].name);
7768 
7769 			reset_direct = true;
7770 			goto reset_device;
7771 		} else {
7772 			hl_fw_unmask_irq(hdev, event_type);
7773 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7774 		}
7775 		break;
7776 
7777 	case GAUDI_EVENT_PCIE_CORE_SERR:
7778 	case GAUDI_EVENT_PCIE_IF_SERR:
7779 	case GAUDI_EVENT_PCIE_PHY_SERR:
7780 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7781 	case GAUDI_EVENT_MME0_ACC_SERR:
7782 	case GAUDI_EVENT_MME0_SBAB_SERR:
7783 	case GAUDI_EVENT_MME1_ACC_SERR:
7784 	case GAUDI_EVENT_MME1_SBAB_SERR:
7785 	case GAUDI_EVENT_MME2_ACC_SERR:
7786 	case GAUDI_EVENT_MME2_SBAB_SERR:
7787 	case GAUDI_EVENT_MME3_ACC_SERR:
7788 	case GAUDI_EVENT_MME3_SBAB_SERR:
7789 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7790 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7791 	case GAUDI_EVENT_PSOC_MEM_SERR:
7792 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7793 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7794 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7795 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7796 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7797 		fallthrough;
7798 	case GAUDI_EVENT_MMU_SERR:
7799 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7800 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7801 		hl_fw_unmask_irq(hdev, event_type);
7802 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7803 		break;
7804 
7805 	case GAUDI_EVENT_PCIE_DEC:
7806 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7807 	case GAUDI_EVENT_PSOC_AXI_DEC:
7808 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7809 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7810 		hl_fw_unmask_irq(hdev, event_type);
7811 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7812 		break;
7813 
7814 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7815 	case GAUDI_EVENT_MMU_WR_PERM:
7816 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7817 		hl_fw_unmask_irq(hdev, event_type);
7818 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7819 		break;
7820 
7821 	case GAUDI_EVENT_MME0_WBC_RSP:
7822 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7823 	case GAUDI_EVENT_MME1_WBC_RSP:
7824 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7825 	case GAUDI_EVENT_MME2_WBC_RSP:
7826 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7827 	case GAUDI_EVENT_MME3_WBC_RSP:
7828 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7829 	case GAUDI_EVENT_RAZWI_OR_ADC:
7830 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7831 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7832 		fallthrough;
7833 	case GAUDI_EVENT_NIC0_QM0:
7834 	case GAUDI_EVENT_NIC0_QM1:
7835 	case GAUDI_EVENT_NIC1_QM0:
7836 	case GAUDI_EVENT_NIC1_QM1:
7837 	case GAUDI_EVENT_NIC2_QM0:
7838 	case GAUDI_EVENT_NIC2_QM1:
7839 	case GAUDI_EVENT_NIC3_QM0:
7840 	case GAUDI_EVENT_NIC3_QM1:
7841 	case GAUDI_EVENT_NIC4_QM0:
7842 	case GAUDI_EVENT_NIC4_QM1:
7843 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7844 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7845 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7846 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7847 		hl_fw_unmask_irq(hdev, event_type);
7848 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7849 		break;
7850 
7851 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7852 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7853 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7854 		goto reset_device;
7855 
7856 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7857 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7858 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7859 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7860 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7861 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7862 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7863 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7864 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7865 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7866 		hl_fw_unmask_irq(hdev, event_type);
7867 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7868 		break;
7869 
7870 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7871 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7872 		hl_fw_unmask_irq(hdev, event_type);
7873 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7874 		break;
7875 
7876 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7877 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7878 		gaudi_print_sm_sei_info(hdev, event_type,
7879 					&eq_entry->sm_sei_data);
7880 		rc = hl_state_dump(hdev);
7881 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7882 		if (rc)
7883 			dev_err(hdev->dev,
7884 				"Error during system state dump %d\n", rc);
7885 		hl_fw_unmask_irq(hdev, event_type);
7886 		break;
7887 
7888 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7889 		break;
7890 
7891 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7892 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7893 		hl_fw_unmask_irq(hdev, event_type);
7894 		break;
7895 
7896 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7897 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7898 		dev_err(hdev->dev,
7899 			"Received high temp H/W interrupt %d (cause %d)\n",
7900 			event_type, cause);
7901 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7902 		break;
7903 
7904 	case GAUDI_EVENT_DEV_RESET_REQ:
7905 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7906 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7907 		goto reset_device;
7908 
7909 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7910 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7911 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7912 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7913 		goto reset_device;
7914 
7915 	case GAUDI_EVENT_FW_ALIVE_S:
7916 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7917 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7918 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7919 		goto reset_device;
7920 
7921 	default:
7922 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7923 				event_type);
7924 		break;
7925 	}
7926 
7927 	if (event_mask)
7928 		hl_notifier_event_send_all(hdev, event_mask);
7929 
7930 	return;
7931 
7932 reset_device:
7933 	reset_required = true;
7934 
7935 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7936 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7937 
7938 		/* notify on device unavailable while the reset triggered by fw */
7939 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7940 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7941 	} else if (hdev->hard_reset_on_fw_events) {
7942 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7943 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7944 	} else {
7945 		reset_required = false;
7946 	}
7947 
7948 	if (reset_required) {
7949 		hl_device_cond_reset(hdev, flags, event_mask);
7950 	} else {
7951 		hl_fw_unmask_irq(hdev, event_type);
7952 		/* Notification on occurred event needs to be sent although reset is not executed */
7953 		if (event_mask)
7954 			hl_notifier_event_send_all(hdev, event_mask);
7955 	}
7956 }
7957 
7958 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7959 {
7960 	struct gaudi_device *gaudi = hdev->asic_specific;
7961 
7962 	if (aggregate) {
7963 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7964 		return gaudi->events_stat_aggregate;
7965 	}
7966 
7967 	*size = (u32) sizeof(gaudi->events_stat);
7968 	return gaudi->events_stat;
7969 }
7970 
7971 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7972 {
7973 	struct gaudi_device *gaudi = hdev->asic_specific;
7974 	u32 status, timeout_usec;
7975 	int rc;
7976 
7977 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7978 		hdev->reset_info.hard_reset_pending)
7979 		return 0;
7980 
7981 	if (hdev->pldm)
7982 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7983 	else
7984 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7985 
7986 	/* L0 & L1 invalidation */
7987 	WREG32(mmSTLB_INV_PS, 3);
7988 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7989 	WREG32(mmSTLB_INV_PS, 2);
7990 
7991 	rc = hl_poll_timeout(
7992 		hdev,
7993 		mmSTLB_INV_PS,
7994 		status,
7995 		!status,
7996 		1000,
7997 		timeout_usec);
7998 
7999 	WREG32(mmSTLB_INV_SET, 0);
8000 
8001 	return rc;
8002 }
8003 
8004 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8005 						bool is_hard, u32 flags,
8006 						u32 asid, u64 va, u64 size)
8007 {
8008 	/* Treat as invalidate all because there is no range invalidation
8009 	 * in Gaudi
8010 	 */
8011 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8012 }
8013 
8014 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8015 {
8016 	u32 status, timeout_usec;
8017 	int rc;
8018 
8019 	if (hdev->pldm)
8020 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8021 	else
8022 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8023 
8024 	WREG32(MMU_ASID, asid);
8025 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8026 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8027 	WREG32(MMU_BUSY, 0x80000000);
8028 
8029 	rc = hl_poll_timeout(
8030 		hdev,
8031 		MMU_BUSY,
8032 		status,
8033 		!(status & 0x80000000),
8034 		1000,
8035 		timeout_usec);
8036 
8037 	if (rc) {
8038 		dev_err(hdev->dev,
8039 			"Timeout during MMU hop0 config of asid %d\n", asid);
8040 		return rc;
8041 	}
8042 
8043 	return 0;
8044 }
8045 
8046 static int gaudi_send_heartbeat(struct hl_device *hdev)
8047 {
8048 	struct gaudi_device *gaudi = hdev->asic_specific;
8049 
8050 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8051 		return 0;
8052 
8053 	return hl_fw_send_heartbeat(hdev);
8054 }
8055 
8056 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8057 {
8058 	struct gaudi_device *gaudi = hdev->asic_specific;
8059 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8060 	int rc;
8061 
8062 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8063 		return 0;
8064 
8065 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8066 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8067 					mmCPU_BOOT_ERR1);
8068 	if (rc)
8069 		return rc;
8070 
8071 	if (!strlen(prop->cpucp_info.card_name))
8072 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8073 				CARD_NAME_MAX_LEN);
8074 
8075 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8076 
8077 	set_default_power_values(hdev);
8078 
8079 	return 0;
8080 }
8081 
8082 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8083 		struct engines_data *e)
8084 {
8085 	struct gaudi_device *gaudi = hdev->asic_specific;
8086 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8087 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8088 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8089 	unsigned long *mask = (unsigned long *)mask_arr;
8090 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8091 	bool is_idle = true, is_eng_idle, is_slave;
8092 	u64 offset;
8093 	int i, dma_id, port;
8094 
8095 	if (e)
8096 		hl_engine_data_sprintf(e,
8097 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8098 			"---  -------  ------------  ----------  -------------\n");
8099 
8100 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8101 		dma_id = gaudi_dma_assignment[i];
8102 		offset = dma_id * DMA_QMAN_OFFSET;
8103 
8104 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8105 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8106 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8107 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8108 				IS_DMA_IDLE(dma_core_sts0);
8109 		is_idle &= is_eng_idle;
8110 
8111 		if (mask && !is_eng_idle)
8112 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8113 		if (e)
8114 			hl_engine_data_sprintf(e, fmt, dma_id,
8115 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8116 				qm_cgm_sts, dma_core_sts0);
8117 	}
8118 
8119 	if (e)
8120 		hl_engine_data_sprintf(e,
8121 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8122 			"---  -------  ------------  ----------  ----------\n");
8123 
8124 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8125 		offset = i * TPC_QMAN_OFFSET;
8126 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8127 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8128 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8129 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8130 				IS_TPC_IDLE(tpc_cfg_sts);
8131 		is_idle &= is_eng_idle;
8132 
8133 		if (mask && !is_eng_idle)
8134 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8135 		if (e)
8136 			hl_engine_data_sprintf(e, fmt, i,
8137 				is_eng_idle ? "Y" : "N",
8138 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8139 	}
8140 
8141 	if (e)
8142 		hl_engine_data_sprintf(e,
8143 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8144 			"---  -------  ------------  ----------  -----------\n");
8145 
8146 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8147 		offset = i * MME_QMAN_OFFSET;
8148 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8149 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8150 
8151 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8152 		is_slave = i % 2;
8153 		if (!is_slave) {
8154 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8155 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8156 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8157 		}
8158 
8159 		is_idle &= is_eng_idle;
8160 
8161 		if (mask && !is_eng_idle)
8162 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8163 		if (e) {
8164 			if (!is_slave)
8165 				hl_engine_data_sprintf(e, fmt, i,
8166 					is_eng_idle ? "Y" : "N",
8167 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8168 			else
8169 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8170 					is_eng_idle ? "Y" : "N", "-",
8171 					"-", mme_arch_sts);
8172 		}
8173 	}
8174 
8175 	if (e)
8176 		hl_engine_data_sprintf(e,
8177 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8178 				"---  -------  ------------  ----------\n");
8179 
8180 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8181 		offset = i * NIC_MACRO_QMAN_OFFSET;
8182 		port = 2 * i;
8183 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8184 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8185 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8186 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8187 			is_idle &= is_eng_idle;
8188 
8189 			if (mask && !is_eng_idle)
8190 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8191 			if (e)
8192 				hl_engine_data_sprintf(e, nic_fmt, port,
8193 						is_eng_idle ? "Y" : "N",
8194 						qm_glbl_sts0, qm_cgm_sts);
8195 		}
8196 
8197 		port = 2 * i + 1;
8198 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8199 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8200 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8201 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8202 			is_idle &= is_eng_idle;
8203 
8204 			if (mask && !is_eng_idle)
8205 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8206 			if (e)
8207 				hl_engine_data_sprintf(e, nic_fmt, port,
8208 						is_eng_idle ? "Y" : "N",
8209 						qm_glbl_sts0, qm_cgm_sts);
8210 		}
8211 	}
8212 
8213 	if (e)
8214 		hl_engine_data_sprintf(e, "\n");
8215 
8216 	return is_idle;
8217 }
8218 
8219 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8220 	__acquires(&gaudi->hw_queues_lock)
8221 {
8222 	struct gaudi_device *gaudi = hdev->asic_specific;
8223 
8224 	spin_lock(&gaudi->hw_queues_lock);
8225 }
8226 
8227 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8228 	__releases(&gaudi->hw_queues_lock)
8229 {
8230 	struct gaudi_device *gaudi = hdev->asic_specific;
8231 
8232 	spin_unlock(&gaudi->hw_queues_lock);
8233 }
8234 
8235 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8236 {
8237 	return hdev->pdev->device;
8238 }
8239 
8240 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8241 				size_t max_size)
8242 {
8243 	struct gaudi_device *gaudi = hdev->asic_specific;
8244 
8245 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8246 		return 0;
8247 
8248 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8249 }
8250 
8251 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8252 {
8253 	struct gaudi_device *gaudi = hdev->asic_specific;
8254 
8255 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8256 		return 0;
8257 
8258 	return hl_fw_get_monitor_dump(hdev, data);
8259 }
8260 
8261 /*
8262  * this function should be used only during initialization and/or after reset,
8263  * when there are no active users.
8264  */
8265 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8266 {
8267 	u64 kernel_timeout;
8268 	u32 status, offset;
8269 	int rc;
8270 
8271 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8272 
8273 	if (hdev->pldm)
8274 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8275 	else
8276 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8277 
8278 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8279 			lower_32_bits(tpc_kernel));
8280 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8281 			upper_32_bits(tpc_kernel));
8282 
8283 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8284 			lower_32_bits(tpc_kernel));
8285 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8286 			upper_32_bits(tpc_kernel));
8287 	/* set a valid LUT pointer, content is of no significance */
8288 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8289 			lower_32_bits(tpc_kernel));
8290 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8291 			upper_32_bits(tpc_kernel));
8292 
8293 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8294 			lower_32_bits(CFG_BASE +
8295 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8296 
8297 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8298 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8299 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8300 	/* wait a bit for the engine to start executing */
8301 	usleep_range(1000, 1500);
8302 
8303 	/* wait until engine has finished executing */
8304 	rc = hl_poll_timeout(
8305 		hdev,
8306 		mmTPC0_CFG_STATUS + offset,
8307 		status,
8308 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8309 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8310 		1000,
8311 		kernel_timeout);
8312 
8313 	if (rc) {
8314 		dev_err(hdev->dev,
8315 			"Timeout while waiting for TPC%d icache prefetch\n",
8316 			tpc_id);
8317 		return -EIO;
8318 	}
8319 
8320 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8321 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8322 
8323 	/* wait a bit for the engine to start executing */
8324 	usleep_range(1000, 1500);
8325 
8326 	/* wait until engine has finished executing */
8327 	rc = hl_poll_timeout(
8328 		hdev,
8329 		mmTPC0_CFG_STATUS + offset,
8330 		status,
8331 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8332 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8333 		1000,
8334 		kernel_timeout);
8335 
8336 	if (rc) {
8337 		dev_err(hdev->dev,
8338 			"Timeout while waiting for TPC%d vector pipe\n",
8339 			tpc_id);
8340 		return -EIO;
8341 	}
8342 
8343 	rc = hl_poll_timeout(
8344 		hdev,
8345 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8346 		status,
8347 		(status == 0),
8348 		1000,
8349 		kernel_timeout);
8350 
8351 	if (rc) {
8352 		dev_err(hdev->dev,
8353 			"Timeout while waiting for TPC%d kernel to execute\n",
8354 			tpc_id);
8355 		return -EIO;
8356 	}
8357 
8358 	return 0;
8359 }
8360 
8361 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8362 		struct hl_ctx *ctx)
8363 {
8364 	struct gaudi_device *gaudi = hdev->asic_specific;
8365 	int min_alloc_order, rc, collective_cb_size;
8366 
8367 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8368 		return 0;
8369 
8370 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8371 							HOST_SPACE_INTERNAL_CB_SZ,
8372 							&hdev->internal_cb_pool_dma_addr,
8373 							GFP_KERNEL | __GFP_ZERO);
8374 
8375 	if (!hdev->internal_cb_pool_virt_addr)
8376 		return -ENOMEM;
8377 
8378 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8379 			sizeof(struct packet_fence);
8380 	min_alloc_order = ilog2(collective_cb_size);
8381 
8382 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8383 	if (!hdev->internal_cb_pool) {
8384 		dev_err(hdev->dev,
8385 			"Failed to create internal CB pool\n");
8386 		rc = -ENOMEM;
8387 		goto free_internal_cb_pool;
8388 	}
8389 
8390 	rc = gen_pool_add(hdev->internal_cb_pool,
8391 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8392 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8393 	if (rc) {
8394 		dev_err(hdev->dev,
8395 			"Failed to add memory to internal CB pool\n");
8396 		rc = -EFAULT;
8397 		goto destroy_internal_cb_pool;
8398 	}
8399 
8400 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8401 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8402 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8403 
8404 	if (!hdev->internal_cb_va_base) {
8405 		rc = -ENOMEM;
8406 		goto destroy_internal_cb_pool;
8407 	}
8408 
8409 	mutex_lock(&hdev->mmu_lock);
8410 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8411 			hdev->internal_cb_pool_dma_addr,
8412 			HOST_SPACE_INTERNAL_CB_SZ);
8413 
8414 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8415 	mutex_unlock(&hdev->mmu_lock);
8416 
8417 	if (rc)
8418 		goto unreserve_internal_cb_pool;
8419 
8420 	return 0;
8421 
8422 unreserve_internal_cb_pool:
8423 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8424 			HOST_SPACE_INTERNAL_CB_SZ);
8425 destroy_internal_cb_pool:
8426 	gen_pool_destroy(hdev->internal_cb_pool);
8427 free_internal_cb_pool:
8428 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8429 					hdev->internal_cb_pool_dma_addr);
8430 
8431 	return rc;
8432 }
8433 
8434 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8435 		struct hl_ctx *ctx)
8436 {
8437 	struct gaudi_device *gaudi = hdev->asic_specific;
8438 
8439 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8440 		return;
8441 
8442 	mutex_lock(&hdev->mmu_lock);
8443 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8444 			HOST_SPACE_INTERNAL_CB_SZ);
8445 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8446 			HOST_SPACE_INTERNAL_CB_SZ);
8447 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8448 	mutex_unlock(&hdev->mmu_lock);
8449 
8450 	gen_pool_destroy(hdev->internal_cb_pool);
8451 
8452 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8453 					hdev->internal_cb_pool_dma_addr);
8454 }
8455 
8456 static int gaudi_ctx_init(struct hl_ctx *ctx)
8457 {
8458 	int rc;
8459 
8460 	if (ctx->asid == HL_KERNEL_ASID_ID)
8461 		return 0;
8462 
8463 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8464 	if (rc)
8465 		return rc;
8466 
8467 	rc = gaudi_restore_user_registers(ctx->hdev);
8468 	if (rc)
8469 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8470 
8471 	return rc;
8472 }
8473 
8474 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8475 {
8476 	if (ctx->asid == HL_KERNEL_ASID_ID)
8477 		return;
8478 
8479 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8480 }
8481 
8482 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8483 {
8484 	return 0;
8485 }
8486 
8487 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8488 {
8489 	return gaudi_cq_assignment[cq_idx];
8490 }
8491 
8492 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8493 {
8494 	return sizeof(struct packet_msg_short) +
8495 			sizeof(struct packet_msg_prot) * 2;
8496 }
8497 
8498 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8499 {
8500 	return sizeof(struct packet_msg_short) * 4 +
8501 			sizeof(struct packet_fence) +
8502 			sizeof(struct packet_msg_prot) * 2;
8503 }
8504 
8505 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8506 {
8507 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8508 }
8509 
8510 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8511 				u32 size, bool eb)
8512 {
8513 	struct hl_cb *cb = (struct hl_cb *) data;
8514 	struct packet_msg_short *pkt;
8515 	u32 value, ctl, pkt_size = sizeof(*pkt);
8516 
8517 	pkt = cb->kernel_address + size;
8518 	memset(pkt, 0, pkt_size);
8519 
8520 	/* Inc by 1, Mode ADD */
8521 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8522 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8523 
8524 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8525 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8526 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8527 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8528 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8529 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8530 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8531 
8532 	pkt->value = cpu_to_le32(value);
8533 	pkt->ctl = cpu_to_le32(ctl);
8534 
8535 	return size + pkt_size;
8536 }
8537 
8538 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8539 					u16 addr)
8540 {
8541 	u32 ctl, pkt_size = sizeof(*pkt);
8542 
8543 	memset(pkt, 0, pkt_size);
8544 
8545 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8546 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8547 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8548 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8549 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8550 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8551 
8552 	pkt->value = cpu_to_le32(value);
8553 	pkt->ctl = cpu_to_le32(ctl);
8554 
8555 	return pkt_size;
8556 }
8557 
8558 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8559 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8560 		u16 sob_val, u16 mon_id)
8561 {
8562 	u64 monitor_base;
8563 	u32 ctl, value, pkt_size = sizeof(*pkt);
8564 	u16 msg_addr_offset;
8565 	u8 mask;
8566 
8567 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8568 		dev_err(hdev->dev,
8569 			"sob_base %u (mask %#x) is not valid\n",
8570 			sob_base, sob_mask);
8571 		return 0;
8572 	}
8573 
8574 	/*
8575 	 * monitor_base should be the content of the base0 address registers,
8576 	 * so it will be added to the msg short offsets
8577 	 */
8578 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8579 
8580 	msg_addr_offset =
8581 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8582 				monitor_base;
8583 
8584 	memset(pkt, 0, pkt_size);
8585 
8586 	/* Monitor config packet: bind the monitor to a sync object */
8587 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8588 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8589 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8590 			0); /* GREATER OR EQUAL*/
8591 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8592 
8593 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8594 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8595 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8596 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8597 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8598 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8599 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8600 
8601 	pkt->value = cpu_to_le32(value);
8602 	pkt->ctl = cpu_to_le32(ctl);
8603 
8604 	return pkt_size;
8605 }
8606 
8607 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8608 {
8609 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8610 
8611 	memset(pkt, 0, pkt_size);
8612 
8613 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8614 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8615 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8616 
8617 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8618 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8619 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8620 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8621 
8622 	pkt->cfg = cpu_to_le32(cfg);
8623 	pkt->ctl = cpu_to_le32(ctl);
8624 
8625 	return pkt_size;
8626 }
8627 
8628 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8629 {
8630 	u32 offset, nic_index;
8631 
8632 	switch (queue_id) {
8633 	case GAUDI_QUEUE_ID_DMA_0_0:
8634 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8635 		break;
8636 	case GAUDI_QUEUE_ID_DMA_0_1:
8637 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8638 		break;
8639 	case GAUDI_QUEUE_ID_DMA_0_2:
8640 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8641 		break;
8642 	case GAUDI_QUEUE_ID_DMA_0_3:
8643 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8644 		break;
8645 	case GAUDI_QUEUE_ID_DMA_1_0:
8646 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8647 		break;
8648 	case GAUDI_QUEUE_ID_DMA_1_1:
8649 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8650 		break;
8651 	case GAUDI_QUEUE_ID_DMA_1_2:
8652 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8653 		break;
8654 	case GAUDI_QUEUE_ID_DMA_1_3:
8655 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8656 		break;
8657 	case GAUDI_QUEUE_ID_DMA_5_0:
8658 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8659 		break;
8660 	case GAUDI_QUEUE_ID_DMA_5_1:
8661 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8662 		break;
8663 	case GAUDI_QUEUE_ID_DMA_5_2:
8664 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8665 		break;
8666 	case GAUDI_QUEUE_ID_DMA_5_3:
8667 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8668 		break;
8669 	case GAUDI_QUEUE_ID_TPC_7_0:
8670 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8671 		break;
8672 	case GAUDI_QUEUE_ID_TPC_7_1:
8673 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8674 		break;
8675 	case GAUDI_QUEUE_ID_TPC_7_2:
8676 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8677 		break;
8678 	case GAUDI_QUEUE_ID_TPC_7_3:
8679 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8680 		break;
8681 	case GAUDI_QUEUE_ID_NIC_0_0:
8682 	case GAUDI_QUEUE_ID_NIC_1_0:
8683 	case GAUDI_QUEUE_ID_NIC_2_0:
8684 	case GAUDI_QUEUE_ID_NIC_3_0:
8685 	case GAUDI_QUEUE_ID_NIC_4_0:
8686 	case GAUDI_QUEUE_ID_NIC_5_0:
8687 	case GAUDI_QUEUE_ID_NIC_6_0:
8688 	case GAUDI_QUEUE_ID_NIC_7_0:
8689 	case GAUDI_QUEUE_ID_NIC_8_0:
8690 	case GAUDI_QUEUE_ID_NIC_9_0:
8691 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8692 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8693 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8694 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8695 		break;
8696 	case GAUDI_QUEUE_ID_NIC_0_1:
8697 	case GAUDI_QUEUE_ID_NIC_1_1:
8698 	case GAUDI_QUEUE_ID_NIC_2_1:
8699 	case GAUDI_QUEUE_ID_NIC_3_1:
8700 	case GAUDI_QUEUE_ID_NIC_4_1:
8701 	case GAUDI_QUEUE_ID_NIC_5_1:
8702 	case GAUDI_QUEUE_ID_NIC_6_1:
8703 	case GAUDI_QUEUE_ID_NIC_7_1:
8704 	case GAUDI_QUEUE_ID_NIC_8_1:
8705 	case GAUDI_QUEUE_ID_NIC_9_1:
8706 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8707 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8708 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8709 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8710 		break;
8711 	case GAUDI_QUEUE_ID_NIC_0_2:
8712 	case GAUDI_QUEUE_ID_NIC_1_2:
8713 	case GAUDI_QUEUE_ID_NIC_2_2:
8714 	case GAUDI_QUEUE_ID_NIC_3_2:
8715 	case GAUDI_QUEUE_ID_NIC_4_2:
8716 	case GAUDI_QUEUE_ID_NIC_5_2:
8717 	case GAUDI_QUEUE_ID_NIC_6_2:
8718 	case GAUDI_QUEUE_ID_NIC_7_2:
8719 	case GAUDI_QUEUE_ID_NIC_8_2:
8720 	case GAUDI_QUEUE_ID_NIC_9_2:
8721 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8722 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8723 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8724 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8725 		break;
8726 	case GAUDI_QUEUE_ID_NIC_0_3:
8727 	case GAUDI_QUEUE_ID_NIC_1_3:
8728 	case GAUDI_QUEUE_ID_NIC_2_3:
8729 	case GAUDI_QUEUE_ID_NIC_3_3:
8730 	case GAUDI_QUEUE_ID_NIC_4_3:
8731 	case GAUDI_QUEUE_ID_NIC_5_3:
8732 	case GAUDI_QUEUE_ID_NIC_6_3:
8733 	case GAUDI_QUEUE_ID_NIC_7_3:
8734 	case GAUDI_QUEUE_ID_NIC_8_3:
8735 	case GAUDI_QUEUE_ID_NIC_9_3:
8736 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8737 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8738 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8739 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8740 		break;
8741 	default:
8742 		return -EINVAL;
8743 	}
8744 
8745 	*addr = CFG_BASE + offset;
8746 
8747 	return 0;
8748 }
8749 
8750 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8751 {
8752 	u64 monitor_base;
8753 	u32 size = 0;
8754 	u16 msg_addr_offset;
8755 
8756 	/*
8757 	 * monitor_base should be the content of the base0 address registers,
8758 	 * so it will be added to the msg short offsets
8759 	 */
8760 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8761 
8762 	/* First monitor config packet: low address of the sync */
8763 	msg_addr_offset =
8764 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8765 				monitor_base;
8766 
8767 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8768 					msg_addr_offset);
8769 
8770 	/* Second monitor config packet: high address of the sync */
8771 	msg_addr_offset =
8772 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8773 				monitor_base;
8774 
8775 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8776 					msg_addr_offset);
8777 
8778 	/*
8779 	 * Third monitor config packet: the payload, i.e. what to write when the
8780 	 * sync triggers
8781 	 */
8782 	msg_addr_offset =
8783 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8784 				monitor_base;
8785 
8786 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8787 
8788 	return size;
8789 }
8790 
8791 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8792 				struct hl_gen_wait_properties *prop)
8793 {
8794 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8795 	void *buf = cb->kernel_address;
8796 	u64 fence_addr = 0;
8797 	u32 size = prop->size;
8798 
8799 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8800 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8801 				prop->q_idx);
8802 		return 0;
8803 	}
8804 
8805 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8806 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8807 			prop->sob_mask, prop->sob_val, prop->mon_id);
8808 	size += gaudi_add_fence_pkt(buf + size);
8809 
8810 	return size;
8811 }
8812 
8813 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8814 {
8815 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8816 
8817 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8818 		hw_sob->sob_id);
8819 
8820 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8821 			hw_sob->sob_id * 4, 0);
8822 
8823 	kref_init(&hw_sob->kref);
8824 }
8825 
8826 static u64 gaudi_get_device_time(struct hl_device *hdev)
8827 {
8828 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8829 
8830 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8831 }
8832 
8833 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8834 				u32 *block_size, u32 *block_id)
8835 {
8836 	return -EPERM;
8837 }
8838 
8839 static int gaudi_block_mmap(struct hl_device *hdev,
8840 				struct vm_area_struct *vma,
8841 				u32 block_id, u32 block_size)
8842 {
8843 	return -EPERM;
8844 }
8845 
8846 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8847 {
8848 	struct cpu_dyn_regs *dyn_regs =
8849 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8850 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8851 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8852 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8853 
8854 	WREG32(irq_handler_offset,
8855 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8856 }
8857 
8858 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8859 {
8860 	return -EINVAL;
8861 }
8862 
8863 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8864 {
8865 	switch (pll_idx) {
8866 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8867 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8868 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8869 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8870 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8871 	case HL_GAUDI_MME_PLL: return MME_PLL;
8872 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8873 	case HL_GAUDI_IF_PLL: return IF_PLL;
8874 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8875 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8876 	default: return -EINVAL;
8877 	}
8878 }
8879 
8880 static int gaudi_add_sync_to_engine_map_entry(
8881 	struct hl_sync_to_engine_map *map, u32 reg_value,
8882 	enum hl_sync_engine_type engine_type, u32 engine_id)
8883 {
8884 	struct hl_sync_to_engine_map_entry *entry;
8885 
8886 	/* Reg value represents a partial address of sync object,
8887 	 * it is used as unique identifier. For this we need to
8888 	 * clear the cutoff cfg base bits from the value.
8889 	 */
8890 	if (reg_value == 0 || reg_value == 0xffffffff)
8891 		return 0;
8892 	reg_value -= lower_32_bits(CFG_BASE);
8893 
8894 	/* create a new hash entry */
8895 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8896 	if (!entry)
8897 		return -ENOMEM;
8898 	entry->engine_type = engine_type;
8899 	entry->engine_id = engine_id;
8900 	entry->sync_id = reg_value;
8901 	hash_add(map->tb, &entry->node, reg_value);
8902 
8903 	return 0;
8904 }
8905 
8906 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8907 				struct hl_sync_to_engine_map *map)
8908 {
8909 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8910 	int i, j, rc;
8911 	u32 reg_value;
8912 
8913 	/* Iterate over TPC engines */
8914 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8915 
8916 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8917 					sds->props[SP_NEXT_TPC] * i);
8918 
8919 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8920 							ENGINE_TPC, i);
8921 		if (rc)
8922 			goto free_sync_to_engine_map;
8923 	}
8924 
8925 	/* Iterate over MME engines */
8926 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8927 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8928 
8929 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8930 						sds->props[SP_NEXT_MME] * i +
8931 						j * sizeof(u32));
8932 
8933 			rc = gaudi_add_sync_to_engine_map_entry(
8934 				map, reg_value, ENGINE_MME,
8935 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8936 			if (rc)
8937 				goto free_sync_to_engine_map;
8938 		}
8939 	}
8940 
8941 	/* Iterate over DMA engines */
8942 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8943 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8944 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8945 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8946 							ENGINE_DMA, i);
8947 		if (rc)
8948 			goto free_sync_to_engine_map;
8949 	}
8950 
8951 	return 0;
8952 
8953 free_sync_to_engine_map:
8954 	hl_state_dump_free_sync_to_engine_map(map);
8955 
8956 	return rc;
8957 }
8958 
8959 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8960 {
8961 	return FIELD_GET(
8962 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8963 		mon->status);
8964 }
8965 
8966 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8967 {
8968 	const size_t max_write = 10;
8969 	u32 gid, mask, sob;
8970 	int i, offset;
8971 
8972 	/* Sync object ID is calculated as follows:
8973 	 * (8 * group_id + cleared bits in mask)
8974 	 */
8975 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8976 			mon->arm_data);
8977 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8978 			mon->arm_data);
8979 
8980 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8981 		max_write; mask >>= 1, i++) {
8982 		if (!(mask & 1)) {
8983 			sob = gid * MONITOR_MAX_SOBS + i;
8984 
8985 			if (offset > 0)
8986 				offset += snprintf(sobs + offset, max_write,
8987 							", ");
8988 
8989 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8990 		}
8991 	}
8992 }
8993 
8994 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8995 				struct hl_device *hdev,
8996 				struct hl_mon_state_dump *mon)
8997 {
8998 	const char *name;
8999 	char scratch_buf1[BIN_REG_STRING_SIZE],
9000 		scratch_buf2[BIN_REG_STRING_SIZE];
9001 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9002 
9003 	name = hl_state_dump_get_monitor_name(hdev, mon);
9004 	if (!name)
9005 		name = "";
9006 
9007 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9008 
9009 	return hl_snprintf_resize(
9010 		buf, size, offset,
9011 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9012 		mon->id, name,
9013 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9014 				mon->arm_data),
9015 		hl_format_as_binary(
9016 			scratch_buf1, sizeof(scratch_buf1),
9017 			FIELD_GET(
9018 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9019 				mon->arm_data)),
9020 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9021 				mon->arm_data),
9022 		mon->wr_data,
9023 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9024 		hl_format_as_binary(
9025 			scratch_buf2, sizeof(scratch_buf2),
9026 			FIELD_GET(
9027 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9028 				mon->status)),
9029 		monitored_sobs);
9030 }
9031 
9032 
9033 static int gaudi_print_fences_single_engine(
9034 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9035 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9036 	size_t *size, size_t *offset)
9037 {
9038 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9039 	int rc = -ENOMEM, i;
9040 	u32 *statuses, *fences;
9041 
9042 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9043 			sizeof(*statuses), GFP_KERNEL);
9044 	if (!statuses)
9045 		goto out;
9046 
9047 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9048 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9049 			 sizeof(*fences), GFP_KERNEL);
9050 	if (!fences)
9051 		goto free_status;
9052 
9053 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9054 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9055 
9056 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9057 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9058 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9059 
9060 	/* The actual print */
9061 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9062 		u32 fence_id;
9063 		u64 fence_cnt, fence_rdata;
9064 		const char *engine_name;
9065 
9066 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9067 			statuses[i]))
9068 			continue;
9069 
9070 		fence_id =
9071 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9072 		fence_cnt = base_offset + CFG_BASE +
9073 			sizeof(u32) *
9074 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9075 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9076 				sds->props[SP_FENCE0_RDATA_OFFSET];
9077 		engine_name = hl_sync_engine_to_string(engine_type);
9078 
9079 		rc = hl_snprintf_resize(
9080 			buf, size, offset,
9081 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9082 			engine_name, engine_id,
9083 			i, fence_id,
9084 			fence_cnt, engine_name, engine_id, fence_id, i,
9085 			fence_rdata, engine_name, engine_id, fence_id, i,
9086 			fences[fence_id],
9087 			statuses[i]);
9088 		if (rc)
9089 			goto free_fences;
9090 	}
9091 
9092 	rc = 0;
9093 
9094 free_fences:
9095 	kfree(fences);
9096 free_status:
9097 	kfree(statuses);
9098 out:
9099 	return rc;
9100 }
9101 
9102 
9103 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9104 	.monitor_valid = gaudi_monitor_valid,
9105 	.print_single_monitor = gaudi_print_single_monitor,
9106 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9107 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9108 };
9109 
9110 static void gaudi_state_dump_init(struct hl_device *hdev)
9111 {
9112 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9113 	int i;
9114 
9115 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9116 		hash_add(sds->so_id_to_str_tb,
9117 			&gaudi_so_id_to_str[i].node,
9118 			gaudi_so_id_to_str[i].id);
9119 
9120 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9121 		hash_add(sds->monitor_id_to_str_tb,
9122 			&gaudi_monitor_id_to_str[i].node,
9123 			gaudi_monitor_id_to_str[i].id);
9124 
9125 	sds->props = gaudi_state_dump_specs_props;
9126 
9127 	sds->sync_namager_names = gaudi_sync_manager_names;
9128 
9129 	sds->funcs = gaudi_state_dump_funcs;
9130 }
9131 
9132 static u32 *gaudi_get_stream_master_qid_arr(void)
9133 {
9134 	return gaudi_stream_master;
9135 }
9136 
9137 static int gaudi_set_dram_properties(struct hl_device *hdev)
9138 {
9139 	return 0;
9140 }
9141 
9142 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9143 {
9144 }
9145 
9146 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9147 {
9148 	struct hl_device *hdev = dev_get_drvdata(dev);
9149 	struct cpucp_info *cpucp_info;
9150 
9151 	cpucp_info = &hdev->asic_prop.cpucp_info;
9152 
9153 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9154 }
9155 
9156 static DEVICE_ATTR_RO(infineon_ver);
9157 
9158 static struct attribute *gaudi_vrm_dev_attrs[] = {
9159 	&dev_attr_infineon_ver.attr,
9160 	NULL,
9161 };
9162 
9163 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9164 					struct attribute_group *dev_vrm_attr_grp)
9165 {
9166 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9167 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9168 }
9169 
9170 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9171 {
9172 	return 0;
9173 }
9174 
9175 static const struct hl_asic_funcs gaudi_funcs = {
9176 	.early_init = gaudi_early_init,
9177 	.early_fini = gaudi_early_fini,
9178 	.late_init = gaudi_late_init,
9179 	.late_fini = gaudi_late_fini,
9180 	.sw_init = gaudi_sw_init,
9181 	.sw_fini = gaudi_sw_fini,
9182 	.hw_init = gaudi_hw_init,
9183 	.hw_fini = gaudi_hw_fini,
9184 	.halt_engines = gaudi_halt_engines,
9185 	.suspend = gaudi_suspend,
9186 	.resume = gaudi_resume,
9187 	.mmap = gaudi_mmap,
9188 	.ring_doorbell = gaudi_ring_doorbell,
9189 	.pqe_write = gaudi_pqe_write,
9190 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9191 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9192 	.scrub_device_mem = gaudi_scrub_device_mem,
9193 	.scrub_device_dram = gaudi_scrub_device_dram,
9194 	.get_int_queue_base = gaudi_get_int_queue_base,
9195 	.test_queues = gaudi_test_queues,
9196 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9197 	.asic_dma_pool_free = gaudi_dma_pool_free,
9198 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9199 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9200 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9201 	.cs_parser = gaudi_cs_parser,
9202 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9203 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9204 	.update_eq_ci = gaudi_update_eq_ci,
9205 	.context_switch = gaudi_context_switch,
9206 	.restore_phase_topology = gaudi_restore_phase_topology,
9207 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9208 	.add_device_attr = gaudi_add_device_attr,
9209 	.handle_eqe = gaudi_handle_eqe,
9210 	.get_events_stat = gaudi_get_events_stat,
9211 	.read_pte = gaudi_read_pte,
9212 	.write_pte = gaudi_write_pte,
9213 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9214 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9215 	.mmu_prefetch_cache_range = NULL,
9216 	.send_heartbeat = gaudi_send_heartbeat,
9217 	.debug_coresight = gaudi_debug_coresight,
9218 	.is_device_idle = gaudi_is_device_idle,
9219 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9220 	.hw_queues_lock = gaudi_hw_queues_lock,
9221 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9222 	.get_pci_id = gaudi_get_pci_id,
9223 	.get_eeprom_data = gaudi_get_eeprom_data,
9224 	.get_monitor_dump = gaudi_get_monitor_dump,
9225 	.send_cpu_message = gaudi_send_cpu_message,
9226 	.pci_bars_map = gaudi_pci_bars_map,
9227 	.init_iatu = gaudi_init_iatu,
9228 	.rreg = hl_rreg,
9229 	.wreg = hl_wreg,
9230 	.halt_coresight = gaudi_halt_coresight,
9231 	.ctx_init = gaudi_ctx_init,
9232 	.ctx_fini = gaudi_ctx_fini,
9233 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9234 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9235 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9236 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9237 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9238 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9239 	.gen_signal_cb = gaudi_gen_signal_cb,
9240 	.gen_wait_cb = gaudi_gen_wait_cb,
9241 	.reset_sob = gaudi_reset_sob,
9242 	.reset_sob_group = gaudi_reset_sob_group,
9243 	.get_device_time = gaudi_get_device_time,
9244 	.pb_print_security_errors = NULL,
9245 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9246 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9247 	.get_dec_base_addr = NULL,
9248 	.scramble_addr = hl_mmu_scramble_addr,
9249 	.descramble_addr = hl_mmu_descramble_addr,
9250 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9251 	.get_hw_block_id = gaudi_get_hw_block_id,
9252 	.hw_block_mmap = gaudi_block_mmap,
9253 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9254 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9255 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9256 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9257 	.init_firmware_loader = gaudi_init_firmware_loader,
9258 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9259 	.state_dump_init = gaudi_state_dump_init,
9260 	.get_sob_addr = gaudi_get_sob_addr,
9261 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9262 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9263 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9264 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9265 	.access_dev_mem = hl_access_dev_mem,
9266 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9267 	.send_device_activity = gaudi_send_device_activity,
9268 	.set_dram_properties = gaudi_set_dram_properties,
9269 };
9270 
9271 /**
9272  * gaudi_set_asic_funcs - set GAUDI function pointers
9273  *
9274  * @hdev: pointer to hl_device structure
9275  *
9276  */
9277 void gaudi_set_asic_funcs(struct hl_device *hdev)
9278 {
9279 	hdev->asic_funcs = &gaudi_funcs;
9280 }
9281