1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 		"gaudi cpu eq"
122 };
123 
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134 
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
137 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
138 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
139 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
140 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
141 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
142 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
143 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145 
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
148 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
149 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
150 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
151 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
152 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
153 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
154 	[PACKET_FENCE]		= sizeof(struct packet_fence),
155 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
156 	[PACKET_NOP]		= sizeof(struct packet_nop),
157 	[PACKET_STOP]		= sizeof(struct packet_stop),
158 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
159 	[PACKET_WAIT]		= sizeof(struct packet_wait),
160 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
161 };
162 
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 	switch (id) {
166 	case PACKET_WREG_32:
167 	case PACKET_WREG_BULK:
168 	case PACKET_MSG_LONG:
169 	case PACKET_MSG_SHORT:
170 	case PACKET_CP_DMA:
171 	case PACKET_REPEAT:
172 	case PACKET_MSG_PROT:
173 	case PACKET_FENCE:
174 	case PACKET_LIN_DMA:
175 	case PACKET_NOP:
176 	case PACKET_STOP:
177 	case PACKET_ARB_POINT:
178 	case PACKET_WAIT:
179 	case PACKET_LOAD_AND_EXE:
180 		return true;
181 	default:
182 		return false;
183 	}
184 }
185 
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 	"tpc_address_exceed_slm",
189 	"tpc_div_by_0",
190 	"tpc_spu_mac_overflow",
191 	"tpc_spu_addsub_overflow",
192 	"tpc_spu_abs_overflow",
193 	"tpc_spu_fp_dst_nan_inf",
194 	"tpc_spu_fp_dst_denorm",
195 	"tpc_vpu_mac_overflow",
196 	"tpc_vpu_addsub_overflow",
197 	"tpc_vpu_abs_overflow",
198 	"tpc_vpu_fp_dst_nan_inf",
199 	"tpc_vpu_fp_dst_denorm",
200 	"tpc_assertions",
201 	"tpc_illegal_instruction",
202 	"tpc_pc_wrap_around",
203 	"tpc_qm_sw_err",
204 	"tpc_hbw_rresp_err",
205 	"tpc_hbw_bresp_err",
206 	"tpc_lbw_rresp_err",
207 	"tpc_lbw_bresp_err"
208 };
209 
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 	"PQ AXI HBW error",
213 	"CQ AXI HBW error",
214 	"CP AXI HBW error",
215 	"CP error due to undefined OPCODE",
216 	"CP encountered STOP OPCODE",
217 	"CP AXI LBW error",
218 	"CP WRREG32 or WRBULK returned error",
219 	"N/A",
220 	"FENCE 0 inc over max value and clipped",
221 	"FENCE 1 inc over max value and clipped",
222 	"FENCE 2 inc over max value and clipped",
223 	"FENCE 3 inc over max value and clipped",
224 	"FENCE 0 dec under min value and clipped",
225 	"FENCE 1 dec under min value and clipped",
226 	"FENCE 2 dec under min value and clipped",
227 	"FENCE 3 dec under min value and clipped"
228 };
229 
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 	"Choice push while full error",
233 	"Choice Q watchdog error",
234 	"MSG AXI LBW returned with error"
235 };
236 
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352 
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382 
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396 
397 static s64 gaudi_state_dump_specs_props[] = {
398 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 	[SP_MON_OBJ_WR_ADDR_LOW] =
402 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 	[SP_MON_OBJ_WR_ADDR_HIGH] =
404 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 	[SP_FENCE0_CNT_OFFSET] =
426 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_FENCE0_RDATA_OFFSET] =
428 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 	[SP_NUM_CORES] = 1,
431 };
432 
433 static const int gaudi_queue_id_to_engine_id[] = {
434 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464 
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469 	"SYNC_MGR_E_N",
470 	"SYNC_MGR_W_N",
471 	"SYNC_MGR_E_S",
472 	"SYNC_MGR_W_S",
473 	NULL
474 };
475 
476 struct ecc_info_extract_params {
477 	u64 block_address;
478 	u32 num_memories;
479 	bool derr;
480 };
481 
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 								u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 					struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 					u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 					u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 				u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 				u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 				struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 		return HL_COLLECTIVE_MASTER;
505 
506 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 		return HL_COLLECTIVE_SLAVE;
509 
510 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 		return HL_COLLECTIVE_SLAVE;
513 
514 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 		return HL_COLLECTIVE_SLAVE;
517 
518 	return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520 
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 	struct asic_fixed_properties *prop = &hdev->asic_prop;
524 
525 	if (hdev->card_type == cpucp_card_type_pmc) {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527 
528 		if (prop->fw_security_enabled)
529 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 		else
531 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 	} else {
533 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 	}
536 }
537 
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 	struct asic_fixed_properties *prop = &hdev->asic_prop;
541 	u32 num_sync_stream_queues = 0;
542 	int i;
543 
544 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 	prop->hw_queues_props = kcalloc(prop->max_queues,
546 			sizeof(struct hw_queue_properties),
547 			GFP_KERNEL);
548 
549 	if (!prop->hw_queues_props)
550 		return -ENOMEM;
551 
552 	for (i = 0 ; i < prop->max_queues ; i++) {
553 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 			prop->hw_queues_props[i].driver_only = 0;
556 			prop->hw_queues_props[i].supports_sync_stream = 1;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 			num_sync_stream_queues++;
560 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 			prop->hw_queues_props[i].driver_only = 1;
563 			prop->hw_queues_props[i].supports_sync_stream = 0;
564 			prop->hw_queues_props[i].cb_alloc_flags =
565 				CB_ALLOC_KERNEL;
566 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 			prop->hw_queues_props[i].driver_only = 0;
569 			prop->hw_queues_props[i].supports_sync_stream = 0;
570 			prop->hw_queues_props[i].cb_alloc_flags =
571 				CB_ALLOC_USER;
572 
573 		}
574 		prop->hw_queues_props[i].collective_mode =
575 						get_collective_mode(hdev, i);
576 	}
577 
578 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 	prop->cfg_base_address = CFG_BASE;
580 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 	prop->host_base_address = HOST_PHYS_BASE;
582 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 	prop->collective_first_sob = 0;
586 	prop->collective_first_mon = 0;
587 
588 	/* 2 SOBs per internal queue stream are reserved for collective */
589 	prop->sync_stream_first_sob =
590 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 			* QMAN_STREAMS * HL_RSVD_SOBS;
592 
593 	/* 1 monitor per internal queue stream are reserved for collective
594 	 * 2 monitors per external queue stream are reserved for collective
595 	 */
596 	prop->sync_stream_first_mon =
597 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 			(NUMBER_OF_EXT_HW_QUEUES * 2);
599 
600 	prop->dram_base_address = DRAM_PHYS_BASE;
601 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604 
605 	prop->sram_base_address = SRAM_BASE_ADDR;
606 	prop->sram_size = SRAM_SIZE;
607 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 	prop->sram_user_base_address =
609 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610 
611 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613 
614 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 	if (hdev->pldm)
616 		prop->mmu_pgt_size = 0x800000; /* 8MB */
617 	else
618 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 	prop->mmu_pte_size = HL_PTE_SIZE;
620 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 	prop->dram_page_size = PAGE_SIZE_2MB;
623 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 	prop->dram_supports_virtual_memory = false;
625 
626 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 	prop->pmmu.end_addr =
638 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 	prop->pmmu.page_size = PAGE_SIZE_4KB;
640 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 	prop->pmmu.last_mask = LAST_MASK;
642 	/* TODO: will be duplicated until implementing per-MMU props */
643 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645 
646 	/* PMMU and HPMMU are the same except of page size */
647 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649 
650 	/* shifts and masks are the same in PMMU and DMMU */
651 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 	prop->dmmu.page_size = PAGE_SIZE_2MB;
655 
656 	prop->cfg_size = CFG_SIZE;
657 	prop->max_asid = MAX_ASID;
658 	prop->num_of_events = GAUDI_EVENT_SIZE;
659 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660 
661 	set_default_power_values(hdev);
662 
663 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665 
666 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668 
669 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670 					CARD_NAME_MAX_LEN);
671 
672 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673 
674 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_sob +
676 			(num_sync_stream_queues * HL_RSVD_SOBS);
677 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678 			prop->sync_stream_first_mon +
679 			(num_sync_stream_queues * HL_RSVD_MONS);
680 
681 	prop->first_available_user_interrupt = USHRT_MAX;
682 
683 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 		prop->first_available_cq[i] = USHRT_MAX;
685 
686 	prop->fw_cpu_boot_dev_sts0_valid = false;
687 	prop->fw_cpu_boot_dev_sts1_valid = false;
688 	prop->hard_reset_done_by_fw = false;
689 	prop->gic_interrupts_enable = true;
690 
691 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692 
693 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695 
696 	prop->use_get_power_for_reset_history = true;
697 
698 	prop->configurable_stop_on_err = true;
699 
700 	prop->set_max_power_on_device_init = true;
701 
702 	prop->dma_mask = 48;
703 
704 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
705 
706 	return 0;
707 }
708 
709 static int gaudi_pci_bars_map(struct hl_device *hdev)
710 {
711 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
712 	bool is_wc[3] = {false, false, true};
713 	int rc;
714 
715 	rc = hl_pci_bars_map(hdev, name, is_wc);
716 	if (rc)
717 		return rc;
718 
719 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
720 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
721 
722 	return 0;
723 }
724 
725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
726 {
727 	struct gaudi_device *gaudi = hdev->asic_specific;
728 	struct hl_inbound_pci_region pci_region;
729 	u64 old_addr = addr;
730 	int rc;
731 
732 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
733 		return old_addr;
734 
735 	if (hdev->asic_prop.iatu_done_by_fw)
736 		return U64_MAX;
737 
738 	/* Inbound Region 2 - Bar 4 - Point to HBM */
739 	pci_region.mode = PCI_BAR_MATCH_MODE;
740 	pci_region.bar = HBM_BAR_ID;
741 	pci_region.addr = addr;
742 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
743 	if (rc)
744 		return U64_MAX;
745 
746 	if (gaudi) {
747 		old_addr = gaudi->hbm_bar_cur_addr;
748 		gaudi->hbm_bar_cur_addr = addr;
749 	}
750 
751 	return old_addr;
752 }
753 
754 static int gaudi_init_iatu(struct hl_device *hdev)
755 {
756 	struct hl_inbound_pci_region inbound_region;
757 	struct hl_outbound_pci_region outbound_region;
758 	int rc;
759 
760 	if (hdev->asic_prop.iatu_done_by_fw)
761 		return 0;
762 
763 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
764 	inbound_region.mode = PCI_BAR_MATCH_MODE;
765 	inbound_region.bar = SRAM_BAR_ID;
766 	inbound_region.addr = SRAM_BASE_ADDR;
767 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
768 	if (rc)
769 		goto done;
770 
771 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
772 	inbound_region.mode = PCI_BAR_MATCH_MODE;
773 	inbound_region.bar = CFG_BAR_ID;
774 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
775 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
776 	if (rc)
777 		goto done;
778 
779 	/* Inbound Region 2 - Bar 4 - Point to HBM */
780 	inbound_region.mode = PCI_BAR_MATCH_MODE;
781 	inbound_region.bar = HBM_BAR_ID;
782 	inbound_region.addr = DRAM_PHYS_BASE;
783 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
784 	if (rc)
785 		goto done;
786 
787 	/* Outbound Region 0 - Point to Host */
788 	outbound_region.addr = HOST_PHYS_BASE;
789 	outbound_region.size = HOST_PHYS_SIZE;
790 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
791 
792 done:
793 	return rc;
794 }
795 
796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
797 {
798 	return RREG32(mmHW_STATE);
799 }
800 
801 static int gaudi_early_init(struct hl_device *hdev)
802 {
803 	struct asic_fixed_properties *prop = &hdev->asic_prop;
804 	struct pci_dev *pdev = hdev->pdev;
805 	resource_size_t pci_bar_size;
806 	u32 fw_boot_status;
807 	int rc;
808 
809 	rc = gaudi_set_fixed_properties(hdev);
810 	if (rc) {
811 		dev_err(hdev->dev, "Failed setting fixed properties\n");
812 		return rc;
813 	}
814 
815 	/* Check BAR sizes */
816 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
817 
818 	if (pci_bar_size != SRAM_BAR_SIZE) {
819 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
820 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
821 		rc = -ENODEV;
822 		goto free_queue_props;
823 	}
824 
825 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
826 
827 	if (pci_bar_size != CFG_BAR_SIZE) {
828 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
829 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
830 		rc = -ENODEV;
831 		goto free_queue_props;
832 	}
833 
834 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
835 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
836 
837 	/* If FW security is enabled at this point it means no access to ELBI */
838 	if (hdev->asic_prop.fw_security_enabled) {
839 		hdev->asic_prop.iatu_done_by_fw = true;
840 
841 		/*
842 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
843 		 * decision can only be taken based on PCI ID security.
844 		 */
845 		hdev->asic_prop.gic_interrupts_enable = false;
846 		goto pci_init;
847 	}
848 
849 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
850 				&fw_boot_status);
851 	if (rc)
852 		goto free_queue_props;
853 
854 	/* Check whether FW is configuring iATU */
855 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
856 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
857 		hdev->asic_prop.iatu_done_by_fw = true;
858 
859 pci_init:
860 	rc = hl_pci_init(hdev);
861 	if (rc)
862 		goto free_queue_props;
863 
864 	/* Before continuing in the initialization, we need to read the preboot
865 	 * version to determine whether we run with a security-enabled firmware
866 	 */
867 	rc = hl_fw_read_preboot_status(hdev);
868 	if (rc) {
869 		if (hdev->reset_on_preboot_fail)
870 			hdev->asic_funcs->hw_fini(hdev, true, false);
871 		goto pci_fini;
872 	}
873 
874 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
875 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
876 		hdev->asic_funcs->hw_fini(hdev, true, false);
877 	}
878 
879 	return 0;
880 
881 pci_fini:
882 	hl_pci_fini(hdev);
883 free_queue_props:
884 	kfree(hdev->asic_prop.hw_queues_props);
885 	return rc;
886 }
887 
888 static int gaudi_early_fini(struct hl_device *hdev)
889 {
890 	kfree(hdev->asic_prop.hw_queues_props);
891 	hl_pci_fini(hdev);
892 
893 	return 0;
894 }
895 
896 /**
897  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
898  *
899  * @hdev: pointer to hl_device structure
900  *
901  */
902 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
903 {
904 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
905 	struct asic_fixed_properties *prop = &hdev->asic_prop;
906 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
907 	int rc;
908 
909 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
910 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
911 		struct gaudi_device *gaudi = hdev->asic_specific;
912 
913 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
914 			return 0;
915 
916 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
917 
918 		if (rc)
919 			return rc;
920 
921 		freq = pll_freq_arr[2];
922 	} else {
923 		/* Backward compatibility */
924 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
925 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
926 		nr = RREG32(mmPSOC_CPU_PLL_NR);
927 		nf = RREG32(mmPSOC_CPU_PLL_NF);
928 		od = RREG32(mmPSOC_CPU_PLL_OD);
929 
930 		if (div_sel == DIV_SEL_REF_CLK ||
931 				div_sel == DIV_SEL_DIVIDED_REF) {
932 			if (div_sel == DIV_SEL_REF_CLK)
933 				freq = PLL_REF_CLK;
934 			else
935 				freq = PLL_REF_CLK / (div_fctr + 1);
936 		} else if (div_sel == DIV_SEL_PLL_CLK ||
937 			div_sel == DIV_SEL_DIVIDED_PLL) {
938 			pll_clk = PLL_REF_CLK * (nf + 1) /
939 					((nr + 1) * (od + 1));
940 			if (div_sel == DIV_SEL_PLL_CLK)
941 				freq = pll_clk;
942 			else
943 				freq = pll_clk / (div_fctr + 1);
944 		} else {
945 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
946 			freq = 0;
947 		}
948 	}
949 
950 	prop->psoc_timestamp_frequency = freq;
951 	prop->psoc_pci_pll_nr = nr;
952 	prop->psoc_pci_pll_nf = nf;
953 	prop->psoc_pci_pll_od = od;
954 	prop->psoc_pci_pll_div_factor = div_fctr;
955 
956 	return 0;
957 }
958 
959 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
960 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
961 {
962 	struct asic_fixed_properties *prop = &hdev->asic_prop;
963 	struct packet_lin_dma *init_tpc_mem_pkt;
964 	struct hl_cs_job *job;
965 	struct hl_cb *cb;
966 	u64 dst_addr;
967 	u32 cb_size, ctl;
968 	u8 tpc_id;
969 	int rc;
970 
971 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
972 	if (!cb)
973 		return -EFAULT;
974 
975 	init_tpc_mem_pkt = cb->kernel_address;
976 	cb_size = sizeof(*init_tpc_mem_pkt);
977 	memset(init_tpc_mem_pkt, 0, cb_size);
978 
979 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
980 
981 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
982 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
983 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
984 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
985 
986 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
987 
988 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
989 
990 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
991 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
992 				round_up(prop->sram_user_base_address, SZ_8K));
993 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
994 
995 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
996 	if (!job) {
997 		dev_err(hdev->dev, "Failed to allocate a new job\n");
998 		rc = -ENOMEM;
999 		goto release_cb;
1000 	}
1001 
1002 	job->id = 0;
1003 	job->user_cb = cb;
1004 	atomic_inc(&job->user_cb->cs_cnt);
1005 	job->user_cb_size = cb_size;
1006 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1007 	job->patched_cb = job->user_cb;
1008 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1009 
1010 	hl_debugfs_add_job(hdev, job);
1011 
1012 	rc = gaudi_send_job_on_qman0(hdev, job);
1013 
1014 	if (rc)
1015 		goto free_job;
1016 
1017 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1018 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1019 		if (rc)
1020 			break;
1021 	}
1022 
1023 free_job:
1024 	hl_userptr_delete_list(hdev, &job->userptr_list);
1025 	hl_debugfs_remove_job(hdev, job);
1026 	kfree(job);
1027 	atomic_dec(&cb->cs_cnt);
1028 
1029 release_cb:
1030 	hl_cb_put(cb);
1031 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1032 
1033 	return rc;
1034 }
1035 
1036 /*
1037  * gaudi_init_tpc_mem() - Initialize TPC memories.
1038  * @hdev: Pointer to hl_device structure.
1039  *
1040  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1041  *
1042  * Return: 0 for success, negative value for error.
1043  */
1044 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1045 {
1046 	const struct firmware *fw;
1047 	size_t fw_size;
1048 	void *cpu_addr;
1049 	dma_addr_t dma_handle;
1050 	int rc, count = 5;
1051 
1052 again:
1053 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1054 	if (rc == -EINTR && count-- > 0) {
1055 		msleep(50);
1056 		goto again;
1057 	}
1058 
1059 	if (rc) {
1060 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1061 				GAUDI_TPC_FW_FILE);
1062 		goto out;
1063 	}
1064 
1065 	fw_size = fw->size;
1066 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1067 	if (!cpu_addr) {
1068 		dev_err(hdev->dev,
1069 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1070 			fw_size);
1071 		rc = -ENOMEM;
1072 		goto out;
1073 	}
1074 
1075 	memcpy(cpu_addr, fw->data, fw_size);
1076 
1077 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1078 
1079 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1080 
1081 out:
1082 	release_firmware(fw);
1083 	return rc;
1084 }
1085 
1086 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1087 {
1088 	struct gaudi_device *gaudi = hdev->asic_specific;
1089 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1090 	struct hl_hw_queue *q;
1091 	u32 i, sob_id, sob_group_id, queue_id;
1092 
1093 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1094 	sob_group_id =
1095 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1096 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1097 
1098 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1099 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1100 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1101 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1102 	}
1103 
1104 	/* Both DMA5 and TPC7 use the same resources since only a single
1105 	 * engine need to participate in the reduction process
1106 	 */
1107 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1108 	q = &hdev->kernel_queues[queue_id];
1109 	q->sync_stream_prop.collective_sob_id =
1110 			sob_id + NIC_NUMBER_OF_ENGINES;
1111 
1112 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1113 	q = &hdev->kernel_queues[queue_id];
1114 	q->sync_stream_prop.collective_sob_id =
1115 			sob_id + NIC_NUMBER_OF_ENGINES;
1116 }
1117 
1118 static void gaudi_sob_group_hw_reset(struct kref *ref)
1119 {
1120 	struct gaudi_hw_sob_group *hw_sob_group =
1121 		container_of(ref, struct gaudi_hw_sob_group, kref);
1122 	struct hl_device *hdev = hw_sob_group->hdev;
1123 	int i;
1124 
1125 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1126 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1127 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1128 
1129 	kref_init(&hw_sob_group->kref);
1130 }
1131 
1132 static void gaudi_sob_group_reset_error(struct kref *ref)
1133 {
1134 	struct gaudi_hw_sob_group *hw_sob_group =
1135 		container_of(ref, struct gaudi_hw_sob_group, kref);
1136 	struct hl_device *hdev = hw_sob_group->hdev;
1137 
1138 	dev_crit(hdev->dev,
1139 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1140 		hw_sob_group->base_sob_id);
1141 }
1142 
1143 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1144 {
1145 	struct gaudi_collective_properties *prop;
1146 	int i;
1147 
1148 	prop = &gaudi->collective_props;
1149 
1150 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1151 
1152 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1153 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1154 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1155 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1156 	/* Set collective engine bit */
1157 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1158 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1159 }
1160 
1161 static int gaudi_collective_init(struct hl_device *hdev)
1162 {
1163 	u32 i, sob_id, reserved_sobs_per_group;
1164 	struct gaudi_collective_properties *prop;
1165 	struct gaudi_device *gaudi;
1166 
1167 	gaudi = hdev->asic_specific;
1168 	prop = &gaudi->collective_props;
1169 	sob_id = hdev->asic_prop.collective_first_sob;
1170 
1171 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1172 	reserved_sobs_per_group =
1173 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1174 
1175 	/* Init SOB groups */
1176 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1177 		prop->hw_sob_group[i].hdev = hdev;
1178 		prop->hw_sob_group[i].base_sob_id = sob_id;
1179 		sob_id += reserved_sobs_per_group;
1180 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1181 	}
1182 
1183 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1184 		prop->next_sob_group_val[i] = 1;
1185 		prop->curr_sob_group_idx[i] = 0;
1186 		gaudi_collective_map_sobs(hdev, i);
1187 	}
1188 
1189 	gaudi_collective_mstr_sob_mask_set(gaudi);
1190 
1191 	return 0;
1192 }
1193 
1194 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1195 {
1196 	struct gaudi_device *gaudi = hdev->asic_specific;
1197 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1198 
1199 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1200 					gaudi_sob_group_hw_reset);
1201 }
1202 
1203 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1204 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1205 {
1206 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1207 	struct gaudi_collective_properties *cprop;
1208 	struct hl_gen_wait_properties wait_prop;
1209 	struct hl_sync_stream_properties *prop;
1210 	struct gaudi_device *gaudi;
1211 
1212 	gaudi = hdev->asic_specific;
1213 	cprop = &gaudi->collective_props;
1214 	queue_id = job->hw_queue_id;
1215 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1216 
1217 	master_sob_base =
1218 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1219 	master_monitor = prop->collective_mstr_mon_id[0];
1220 
1221 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1222 
1223 	dev_dbg(hdev->dev,
1224 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1225 		master_sob_base, cprop->mstr_sob_mask[0],
1226 		cprop->next_sob_group_val[stream],
1227 		master_monitor, queue_id);
1228 
1229 	wait_prop.data = (void *) job->patched_cb;
1230 	wait_prop.sob_base = master_sob_base;
1231 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1232 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1233 	wait_prop.mon_id = master_monitor;
1234 	wait_prop.q_idx = queue_id;
1235 	wait_prop.size = cb_size;
1236 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1237 
1238 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1239 	master_monitor = prop->collective_mstr_mon_id[1];
1240 
1241 	dev_dbg(hdev->dev,
1242 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1243 		master_sob_base, cprop->mstr_sob_mask[1],
1244 		cprop->next_sob_group_val[stream],
1245 		master_monitor, queue_id);
1246 
1247 	wait_prop.sob_base = master_sob_base;
1248 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1249 	wait_prop.mon_id = master_monitor;
1250 	wait_prop.size = cb_size;
1251 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1252 }
1253 
1254 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1255 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1256 {
1257 	struct hl_gen_wait_properties wait_prop;
1258 	struct hl_sync_stream_properties *prop;
1259 	u32 queue_id, cb_size = 0;
1260 
1261 	queue_id = job->hw_queue_id;
1262 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1263 
1264 	if (job->cs->encaps_signals) {
1265 		/* use the encaps signal handle store earlier in the flow
1266 		 * and set the SOB information from the encaps
1267 		 * signals handle
1268 		 */
1269 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1270 						cs_cmpl);
1271 
1272 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1273 				job->cs->sequence,
1274 				cs_cmpl->hw_sob->sob_id,
1275 				cs_cmpl->sob_val);
1276 	}
1277 
1278 	/* Add to wait CBs using slave monitor */
1279 	wait_prop.data = (void *) job->user_cb;
1280 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1281 	wait_prop.sob_mask = 0x1;
1282 	wait_prop.sob_val = cs_cmpl->sob_val;
1283 	wait_prop.mon_id = prop->collective_slave_mon_id;
1284 	wait_prop.q_idx = queue_id;
1285 	wait_prop.size = cb_size;
1286 
1287 	dev_dbg(hdev->dev,
1288 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1289 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1290 		prop->collective_slave_mon_id, queue_id);
1291 
1292 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1293 
1294 	dev_dbg(hdev->dev,
1295 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1296 		prop->collective_sob_id, queue_id);
1297 
1298 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1299 			prop->collective_sob_id, cb_size, false);
1300 }
1301 
1302 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1303 {
1304 	struct hl_cs_compl *signal_cs_cmpl =
1305 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1306 	struct hl_cs_compl *cs_cmpl =
1307 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1308 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1309 	struct gaudi_collective_properties *cprop;
1310 	u32 stream, queue_id, sob_group_offset;
1311 	struct gaudi_device *gaudi;
1312 	struct hl_device *hdev;
1313 	struct hl_cs_job *job;
1314 	struct hl_ctx *ctx;
1315 
1316 	ctx = cs->ctx;
1317 	hdev = ctx->hdev;
1318 	gaudi = hdev->asic_specific;
1319 	cprop = &gaudi->collective_props;
1320 
1321 	if (cs->encaps_signals) {
1322 		cs_cmpl->hw_sob = handle->hw_sob;
1323 		/* at this checkpoint we only need the hw_sob pointer
1324 		 * for the completion check before start going over the jobs
1325 		 * of the master/slaves, the sob_value will be taken later on
1326 		 * in gaudi_collective_slave_init_job depends on each
1327 		 * job wait offset value.
1328 		 */
1329 		cs_cmpl->sob_val = 0;
1330 	} else {
1331 		/* copy the SOB id and value of the signal CS */
1332 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1333 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1334 	}
1335 
1336 	/* check again if the signal cs already completed.
1337 	 * if yes then don't send any wait cs since the hw_sob
1338 	 * could be in reset already. if signal is not completed
1339 	 * then get refcount to hw_sob to prevent resetting the sob
1340 	 * while wait cs is not submitted.
1341 	 * note that this check is protected by two locks,
1342 	 * hw queue lock and completion object lock,
1343 	 * and the same completion object lock also protects
1344 	 * the hw_sob reset handler function.
1345 	 * The hw_queue lock prevent out of sync of hw_sob
1346 	 * refcount value, changed by signal/wait flows.
1347 	 */
1348 	spin_lock(&signal_cs_cmpl->lock);
1349 
1350 	if (completion_done(&cs->signal_fence->completion)) {
1351 		spin_unlock(&signal_cs_cmpl->lock);
1352 		return -EINVAL;
1353 	}
1354 	/* Increment kref since all slave queues are now waiting on it */
1355 	kref_get(&cs_cmpl->hw_sob->kref);
1356 
1357 	spin_unlock(&signal_cs_cmpl->lock);
1358 
1359 	/* Calculate the stream from collective master queue (1st job) */
1360 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1361 	stream = job->hw_queue_id % 4;
1362 	sob_group_offset =
1363 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1364 
1365 	list_for_each_entry(job, &cs->job_list, cs_node) {
1366 		queue_id = job->hw_queue_id;
1367 
1368 		if (hdev->kernel_queues[queue_id].collective_mode ==
1369 				HL_COLLECTIVE_MASTER)
1370 			gaudi_collective_master_init_job(hdev, job, stream,
1371 						sob_group_offset);
1372 		else
1373 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1374 	}
1375 
1376 	cs_cmpl->sob_group = sob_group_offset;
1377 
1378 	/* Handle sob group kref and wraparound */
1379 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1380 	cprop->next_sob_group_val[stream]++;
1381 
1382 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1383 		/*
1384 		 * Decrement as we reached the max value.
1385 		 * The release function won't be called here as we've
1386 		 * just incremented the refcount.
1387 		 */
1388 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1389 				gaudi_sob_group_reset_error);
1390 		cprop->next_sob_group_val[stream] = 1;
1391 		/* only two SOBs are currently in use */
1392 		cprop->curr_sob_group_idx[stream] =
1393 			(cprop->curr_sob_group_idx[stream] + 1) &
1394 							(HL_RSVD_SOBS - 1);
1395 
1396 		gaudi_collective_map_sobs(hdev, stream);
1397 
1398 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1399 				cprop->curr_sob_group_idx[stream], stream);
1400 	}
1401 
1402 	mb();
1403 	hl_fence_put(cs->signal_fence);
1404 	cs->signal_fence = NULL;
1405 
1406 	return 0;
1407 }
1408 
1409 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1410 {
1411 	u32 cacheline_end, additional_commands;
1412 
1413 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1414 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1415 
1416 	if (user_cb_size + additional_commands > cacheline_end)
1417 		return cacheline_end - user_cb_size + additional_commands;
1418 	else
1419 		return additional_commands;
1420 }
1421 
1422 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1423 		struct hl_ctx *ctx, struct hl_cs *cs,
1424 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1425 		u32 encaps_signal_offset)
1426 {
1427 	struct hw_queue_properties *hw_queue_prop;
1428 	struct hl_cs_counters_atomic *cntr;
1429 	struct hl_cs_job *job;
1430 	struct hl_cb *cb;
1431 	u32 cb_size;
1432 	bool patched_cb;
1433 
1434 	cntr = &hdev->aggregated_cs_counters;
1435 
1436 	if (mode == HL_COLLECTIVE_MASTER) {
1437 		/* CB size of collective master queue contains
1438 		 * 4 msg short packets for monitor 1 configuration
1439 		 * 1 fence packet
1440 		 * 4 msg short packets for monitor 2 configuration
1441 		 * 1 fence packet
1442 		 * 2 msg prot packets for completion and MSI
1443 		 */
1444 		cb_size = sizeof(struct packet_msg_short) * 8 +
1445 				sizeof(struct packet_fence) * 2 +
1446 				sizeof(struct packet_msg_prot) * 2;
1447 		patched_cb = true;
1448 	} else {
1449 		/* CB size of collective slave queues contains
1450 		 * 4 msg short packets for monitor configuration
1451 		 * 1 fence packet
1452 		 * 1 additional msg short packet for sob signal
1453 		 */
1454 		cb_size = sizeof(struct packet_msg_short) * 5 +
1455 				sizeof(struct packet_fence);
1456 		patched_cb = false;
1457 	}
1458 
1459 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1460 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1461 	if (!job) {
1462 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1463 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1464 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1465 		return -ENOMEM;
1466 	}
1467 
1468 	/* Allocate internal mapped CB for non patched CBs */
1469 	cb = hl_cb_kernel_create(hdev, cb_size,
1470 			hdev->mmu_enable && !patched_cb);
1471 	if (!cb) {
1472 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1473 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1474 		kfree(job);
1475 		return -EFAULT;
1476 	}
1477 
1478 	job->id = 0;
1479 	job->cs = cs;
1480 	job->user_cb = cb;
1481 	atomic_inc(&job->user_cb->cs_cnt);
1482 	job->user_cb_size = cb_size;
1483 	job->hw_queue_id = queue_id;
1484 
1485 	/* since its guaranteed to have only one chunk in the collective wait
1486 	 * cs, we can use this chunk to set the encapsulated signal offset
1487 	 * in the jobs.
1488 	 */
1489 	if (cs->encaps_signals)
1490 		job->encaps_sig_wait_offset = encaps_signal_offset;
1491 
1492 	/*
1493 	 * No need in parsing, user CB is the patched CB.
1494 	 * We call hl_cb_destroy() out of two reasons - we don't need
1495 	 * the CB in the CB idr anymore and to decrement its refcount as
1496 	 * it was incremented inside hl_cb_kernel_create().
1497 	 */
1498 	if (patched_cb)
1499 		job->patched_cb = job->user_cb;
1500 	else
1501 		job->patched_cb = NULL;
1502 
1503 	job->job_cb_size = job->user_cb_size;
1504 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1505 
1506 	/* increment refcount as for external queues we get completion */
1507 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1508 		cs_get(cs);
1509 
1510 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1511 
1512 	list_add_tail(&job->cs_node, &cs->job_list);
1513 
1514 	hl_debugfs_add_job(hdev, job);
1515 
1516 	return 0;
1517 }
1518 
1519 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1520 		struct hl_ctx *ctx, struct hl_cs *cs,
1521 		u32 wait_queue_id, u32 collective_engine_id,
1522 		u32 encaps_signal_offset)
1523 {
1524 	struct gaudi_device *gaudi = hdev->asic_specific;
1525 	struct hw_queue_properties *hw_queue_prop;
1526 	u32 queue_id, collective_queue, num_jobs;
1527 	u32 stream, nic_queue, nic_idx = 0;
1528 	bool skip;
1529 	int i, rc = 0;
1530 
1531 	/* Verify wait queue id is configured as master */
1532 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1533 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1534 		dev_err(hdev->dev,
1535 			"Queue %d is not configured as collective master\n",
1536 			wait_queue_id);
1537 		return -EINVAL;
1538 	}
1539 
1540 	/* Verify engine id is supported */
1541 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1542 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1543 		dev_err(hdev->dev,
1544 			"Collective wait does not support engine %u\n",
1545 			collective_engine_id);
1546 		return -EINVAL;
1547 	}
1548 
1549 	stream = wait_queue_id % 4;
1550 
1551 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1552 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1553 	else
1554 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1555 
1556 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1557 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1558 
1559 	/* First job goes to the collective master queue, it will wait for
1560 	 * the collective slave queues to finish execution.
1561 	 * The synchronization is done using two monitors:
1562 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1563 	 * reduction engine (DMA5/TPC7).
1564 	 *
1565 	 * Rest of the jobs goes to the collective slave queues which will
1566 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1567 	 */
1568 	for (i = 0 ; i < num_jobs ; i++) {
1569 		if (i == 0) {
1570 			queue_id = wait_queue_id;
1571 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1572 				HL_COLLECTIVE_MASTER, queue_id,
1573 				wait_queue_id, encaps_signal_offset);
1574 		} else {
1575 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1576 				if (gaudi->hw_cap_initialized &
1577 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1578 					skip = false;
1579 				else
1580 					skip = true;
1581 
1582 				queue_id = nic_queue;
1583 				nic_queue += 4;
1584 				nic_idx++;
1585 
1586 				if (skip)
1587 					continue;
1588 			} else {
1589 				queue_id = collective_queue;
1590 			}
1591 
1592 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1593 				HL_COLLECTIVE_SLAVE, queue_id,
1594 				wait_queue_id, encaps_signal_offset);
1595 		}
1596 
1597 		if (rc)
1598 			return rc;
1599 	}
1600 
1601 	return rc;
1602 }
1603 
1604 static int gaudi_late_init(struct hl_device *hdev)
1605 {
1606 	struct gaudi_device *gaudi = hdev->asic_specific;
1607 	int rc;
1608 
1609 	rc = gaudi->cpucp_info_get(hdev);
1610 	if (rc) {
1611 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1612 		return rc;
1613 	}
1614 
1615 	if ((hdev->card_type == cpucp_card_type_pci) &&
1616 			(hdev->nic_ports_mask & 0x3)) {
1617 		dev_info(hdev->dev,
1618 			"PCI card detected, only 8 ports are enabled\n");
1619 		hdev->nic_ports_mask &= ~0x3;
1620 
1621 		/* Stop and disable unused NIC QMANs */
1622 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1623 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1624 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1625 
1626 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1627 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1628 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1629 
1630 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1631 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1632 
1633 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1634 	}
1635 
1636 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1637 	if (rc) {
1638 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1639 		return rc;
1640 	}
1641 
1642 	/* Scrub both SRAM and DRAM */
1643 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1644 	if (rc)
1645 		goto disable_pci_access;
1646 
1647 	rc = gaudi_fetch_psoc_frequency(hdev);
1648 	if (rc) {
1649 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1650 		goto disable_pci_access;
1651 	}
1652 
1653 	rc = gaudi_mmu_clear_pgt_range(hdev);
1654 	if (rc) {
1655 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1656 		goto disable_pci_access;
1657 	}
1658 
1659 	rc = gaudi_init_tpc_mem(hdev);
1660 	if (rc) {
1661 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1662 		goto disable_pci_access;
1663 	}
1664 
1665 	rc = gaudi_collective_init(hdev);
1666 	if (rc) {
1667 		dev_err(hdev->dev, "Failed to init collective\n");
1668 		goto disable_pci_access;
1669 	}
1670 
1671 	/* We only support a single ASID for the user, so for the sake of optimization, just
1672 	 * initialize the ASID one time during device initialization with the fixed value of 1
1673 	 */
1674 	gaudi_mmu_prepare(hdev, 1);
1675 
1676 	hl_fw_set_pll_profile(hdev);
1677 
1678 	return 0;
1679 
1680 disable_pci_access:
1681 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1682 
1683 	return rc;
1684 }
1685 
1686 static void gaudi_late_fini(struct hl_device *hdev)
1687 {
1688 	hl_hwmon_release_resources(hdev);
1689 }
1690 
1691 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1692 {
1693 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1694 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1695 	int i, j, rc = 0;
1696 
1697 	/*
1698 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1699 	 * to '1' when accessing the host.
1700 	 * Bits 49:39 of the full host address are saved for a later
1701 	 * configuration of the HW to perform extension to 50 bits.
1702 	 * Because there is a single HW register that holds the extension bits,
1703 	 * these bits must be identical in all allocated range.
1704 	 */
1705 
1706 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1707 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1708 								&dma_addr_arr[i],
1709 								GFP_KERNEL | __GFP_ZERO);
1710 		if (!virt_addr_arr[i]) {
1711 			rc = -ENOMEM;
1712 			goto free_dma_mem_arr;
1713 		}
1714 
1715 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1716 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1717 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1718 			break;
1719 	}
1720 
1721 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1722 		dev_err(hdev->dev,
1723 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1724 		rc = -EFAULT;
1725 		goto free_dma_mem_arr;
1726 	}
1727 
1728 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1729 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1730 	hdev->cpu_pci_msb_addr =
1731 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1732 
1733 	if (!hdev->asic_prop.fw_security_enabled)
1734 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1735 
1736 free_dma_mem_arr:
1737 	for (j = 0 ; j < i ; j++)
1738 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1739 						dma_addr_arr[j]);
1740 
1741 	return rc;
1742 }
1743 
1744 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1745 {
1746 	struct gaudi_device *gaudi = hdev->asic_specific;
1747 	struct gaudi_internal_qman_info *q;
1748 	u32 i;
1749 
1750 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1751 		q = &gaudi->internal_qmans[i];
1752 		if (!q->pq_kernel_addr)
1753 			continue;
1754 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1755 	}
1756 }
1757 
1758 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1759 {
1760 	struct gaudi_device *gaudi = hdev->asic_specific;
1761 	struct gaudi_internal_qman_info *q;
1762 	int rc, i;
1763 
1764 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1765 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1766 			continue;
1767 
1768 		q = &gaudi->internal_qmans[i];
1769 
1770 		switch (i) {
1771 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1772 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1773 			break;
1774 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1775 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1776 			break;
1777 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1778 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1779 			break;
1780 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1781 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1782 			break;
1783 		default:
1784 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1785 			rc = -EINVAL;
1786 			goto free_internal_qmans_pq_mem;
1787 		}
1788 
1789 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1790 								GFP_KERNEL | __GFP_ZERO);
1791 		if (!q->pq_kernel_addr) {
1792 			rc = -ENOMEM;
1793 			goto free_internal_qmans_pq_mem;
1794 		}
1795 	}
1796 
1797 	return 0;
1798 
1799 free_internal_qmans_pq_mem:
1800 	gaudi_free_internal_qmans_pq_mem(hdev);
1801 	return rc;
1802 }
1803 
1804 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1805 {
1806 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1807 	struct pci_mem_region *region;
1808 
1809 	/* CFG */
1810 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1811 	region->region_base = CFG_BASE;
1812 	region->region_size = CFG_SIZE;
1813 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1814 	region->bar_size = CFG_BAR_SIZE;
1815 	region->bar_id = CFG_BAR_ID;
1816 	region->used = 1;
1817 
1818 	/* SRAM */
1819 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1820 	region->region_base = SRAM_BASE_ADDR;
1821 	region->region_size = SRAM_SIZE;
1822 	region->offset_in_bar = 0;
1823 	region->bar_size = SRAM_BAR_SIZE;
1824 	region->bar_id = SRAM_BAR_ID;
1825 	region->used = 1;
1826 
1827 	/* DRAM */
1828 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1829 	region->region_base = DRAM_PHYS_BASE;
1830 	region->region_size = hdev->asic_prop.dram_size;
1831 	region->offset_in_bar = 0;
1832 	region->bar_size = prop->dram_pci_bar_size;
1833 	region->bar_id = HBM_BAR_ID;
1834 	region->used = 1;
1835 
1836 	/* SP SRAM */
1837 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1838 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1839 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1840 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1841 	region->bar_size = CFG_BAR_SIZE;
1842 	region->bar_id = CFG_BAR_ID;
1843 	region->used = 1;
1844 }
1845 
1846 static int gaudi_sw_init(struct hl_device *hdev)
1847 {
1848 	struct gaudi_device *gaudi;
1849 	u32 i, event_id = 0;
1850 	int rc;
1851 
1852 	/* Allocate device structure */
1853 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1854 	if (!gaudi)
1855 		return -ENOMEM;
1856 
1857 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1858 		if (gaudi_irq_map_table[i].valid) {
1859 			if (event_id == GAUDI_EVENT_SIZE) {
1860 				dev_err(hdev->dev,
1861 					"Event array exceeds the limit of %u events\n",
1862 					GAUDI_EVENT_SIZE);
1863 				rc = -EINVAL;
1864 				goto free_gaudi_device;
1865 			}
1866 
1867 			gaudi->events[event_id++] =
1868 					gaudi_irq_map_table[i].fc_id;
1869 		}
1870 	}
1871 
1872 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1873 
1874 	hdev->asic_specific = gaudi;
1875 
1876 	/* Create DMA pool for small allocations */
1877 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1878 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1879 	if (!hdev->dma_pool) {
1880 		dev_err(hdev->dev, "failed to create DMA pool\n");
1881 		rc = -ENOMEM;
1882 		goto free_gaudi_device;
1883 	}
1884 
1885 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1886 	if (rc)
1887 		goto free_dma_pool;
1888 
1889 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1890 	if (!hdev->cpu_accessible_dma_pool) {
1891 		dev_err(hdev->dev,
1892 			"Failed to create CPU accessible DMA pool\n");
1893 		rc = -ENOMEM;
1894 		goto free_cpu_dma_mem;
1895 	}
1896 
1897 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1898 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1899 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1900 	if (rc) {
1901 		dev_err(hdev->dev,
1902 			"Failed to add memory to CPU accessible DMA pool\n");
1903 		rc = -EFAULT;
1904 		goto free_cpu_accessible_dma_pool;
1905 	}
1906 
1907 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1908 	if (rc)
1909 		goto free_cpu_accessible_dma_pool;
1910 
1911 	spin_lock_init(&gaudi->hw_queues_lock);
1912 
1913 	hdev->supports_sync_stream = true;
1914 	hdev->supports_coresight = true;
1915 	hdev->supports_staged_submission = true;
1916 	hdev->supports_wait_for_multi_cs = true;
1917 
1918 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1919 	hdev->stream_master_qid_arr =
1920 				hdev->asic_funcs->get_stream_master_qid_arr();
1921 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1922 
1923 	return 0;
1924 
1925 free_cpu_accessible_dma_pool:
1926 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1927 free_cpu_dma_mem:
1928 	if (!hdev->asic_prop.fw_security_enabled)
1929 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1930 					hdev->cpu_pci_msb_addr);
1931 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1932 					hdev->cpu_accessible_dma_address);
1933 free_dma_pool:
1934 	dma_pool_destroy(hdev->dma_pool);
1935 free_gaudi_device:
1936 	kfree(gaudi);
1937 	return rc;
1938 }
1939 
1940 static int gaudi_sw_fini(struct hl_device *hdev)
1941 {
1942 	struct gaudi_device *gaudi = hdev->asic_specific;
1943 
1944 	gaudi_free_internal_qmans_pq_mem(hdev);
1945 
1946 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1947 
1948 	if (!hdev->asic_prop.fw_security_enabled)
1949 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1950 					hdev->cpu_pci_msb_addr);
1951 
1952 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1953 					hdev->cpu_accessible_dma_address);
1954 
1955 	dma_pool_destroy(hdev->dma_pool);
1956 
1957 	kfree(gaudi);
1958 
1959 	return 0;
1960 }
1961 
1962 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1963 {
1964 	struct hl_device *hdev = arg;
1965 	int i;
1966 
1967 	if (hdev->disabled)
1968 		return IRQ_HANDLED;
1969 
1970 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1971 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1972 
1973 	hl_irq_handler_eq(irq, &hdev->event_queue);
1974 
1975 	return IRQ_HANDLED;
1976 }
1977 
1978 /*
1979  * For backward compatibility, new MSI interrupts should be set after the
1980  * existing CPU and NIC interrupts.
1981  */
1982 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1983 				bool cpu_eq)
1984 {
1985 	int msi_vec;
1986 
1987 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1988 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1989 				GAUDI_EVENT_QUEUE_MSI_IDX);
1990 
1991 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1992 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1993 
1994 	return pci_irq_vector(hdev->pdev, msi_vec);
1995 }
1996 
1997 static int gaudi_enable_msi_single(struct hl_device *hdev)
1998 {
1999 	int rc, irq;
2000 
2001 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2002 
2003 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2004 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2005 			"gaudi single msi", hdev);
2006 	if (rc)
2007 		dev_err(hdev->dev,
2008 			"Failed to request single MSI IRQ\n");
2009 
2010 	return rc;
2011 }
2012 
2013 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2014 {
2015 	int cq_cnt = hdev->asic_prop.completion_queues_count;
2016 	int rc, i, irq_cnt_init, irq;
2017 
2018 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2019 		irq = gaudi_pci_irq_vector(hdev, i, false);
2020 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2021 				&hdev->completion_queue[i]);
2022 		if (rc) {
2023 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2024 			goto free_irqs;
2025 		}
2026 	}
2027 
2028 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2029 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2030 				&hdev->event_queue);
2031 	if (rc) {
2032 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2033 		goto free_irqs;
2034 	}
2035 
2036 	return 0;
2037 
2038 free_irqs:
2039 	for (i = 0 ; i < irq_cnt_init ; i++)
2040 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2041 				&hdev->completion_queue[i]);
2042 	return rc;
2043 }
2044 
2045 static int gaudi_enable_msi(struct hl_device *hdev)
2046 {
2047 	struct gaudi_device *gaudi = hdev->asic_specific;
2048 	int rc;
2049 
2050 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2051 		return 0;
2052 
2053 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2054 	if (rc < 0) {
2055 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2056 		return rc;
2057 	}
2058 
2059 	if (rc < NUMBER_OF_INTERRUPTS) {
2060 		gaudi->multi_msi_mode = false;
2061 		rc = gaudi_enable_msi_single(hdev);
2062 	} else {
2063 		gaudi->multi_msi_mode = true;
2064 		rc = gaudi_enable_msi_multi(hdev);
2065 	}
2066 
2067 	if (rc)
2068 		goto free_pci_irq_vectors;
2069 
2070 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2071 
2072 	return 0;
2073 
2074 free_pci_irq_vectors:
2075 	pci_free_irq_vectors(hdev->pdev);
2076 	return rc;
2077 }
2078 
2079 static void gaudi_sync_irqs(struct hl_device *hdev)
2080 {
2081 	struct gaudi_device *gaudi = hdev->asic_specific;
2082 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2083 
2084 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2085 		return;
2086 
2087 	/* Wait for all pending IRQs to be finished */
2088 	if (gaudi->multi_msi_mode) {
2089 		for (i = 0 ; i < cq_cnt ; i++)
2090 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2091 
2092 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2093 						GAUDI_EVENT_QUEUE_MSI_IDX,
2094 						true));
2095 	} else {
2096 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2097 	}
2098 }
2099 
2100 static void gaudi_disable_msi(struct hl_device *hdev)
2101 {
2102 	struct gaudi_device *gaudi = hdev->asic_specific;
2103 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2104 
2105 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2106 		return;
2107 
2108 	gaudi_sync_irqs(hdev);
2109 
2110 	if (gaudi->multi_msi_mode) {
2111 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2112 						true);
2113 		free_irq(irq, &hdev->event_queue);
2114 
2115 		for (i = 0 ; i < cq_cnt ; i++) {
2116 			irq = gaudi_pci_irq_vector(hdev, i, false);
2117 			free_irq(irq, &hdev->completion_queue[i]);
2118 		}
2119 	} else {
2120 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2121 	}
2122 
2123 	pci_free_irq_vectors(hdev->pdev);
2124 
2125 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2126 }
2127 
2128 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2129 {
2130 	struct gaudi_device *gaudi = hdev->asic_specific;
2131 
2132 	if (hdev->asic_prop.fw_security_enabled)
2133 		return;
2134 
2135 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2136 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2137 		return;
2138 
2139 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2140 		return;
2141 
2142 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2143 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2145 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2147 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2149 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2151 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2157 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158 
2159 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2166 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2174 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2175 
2176 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2177 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2179 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2181 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2183 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2185 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2191 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2192 
2193 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2194 }
2195 
2196 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2197 {
2198 	struct gaudi_device *gaudi = hdev->asic_specific;
2199 
2200 	if (hdev->asic_prop.fw_security_enabled)
2201 		return;
2202 
2203 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2204 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2205 		return;
2206 
2207 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2208 		return;
2209 
2210 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2211 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2213 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2215 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2217 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2219 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2221 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2223 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2225 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226 
2227 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2228 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2230 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2232 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2234 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2236 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2238 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2240 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2242 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243 
2244 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2245 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2247 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2249 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2251 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2253 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2255 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2257 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2259 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260 
2261 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2262 }
2263 
2264 static void gaudi_init_e2e(struct hl_device *hdev)
2265 {
2266 	if (hdev->asic_prop.fw_security_enabled)
2267 		return;
2268 
2269 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2270 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2271 		return;
2272 
2273 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2274 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2275 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2276 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2277 
2278 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2279 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2280 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2281 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2282 
2283 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2284 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2285 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2286 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2287 
2288 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2289 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2290 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2291 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2292 
2293 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2294 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2295 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2296 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2297 
2298 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2299 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2300 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2301 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2302 
2303 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2304 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2305 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2306 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2307 
2308 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2309 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2310 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2311 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2312 
2313 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2314 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2315 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2316 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2317 
2318 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2319 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2320 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2321 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2322 
2323 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2324 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2325 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2326 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2327 
2328 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2329 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2330 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2331 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2332 
2333 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2334 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2335 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2336 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2337 
2338 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2339 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2340 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2341 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2342 
2343 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2344 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2345 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2346 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2347 
2348 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2349 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2350 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2351 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2352 
2353 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2354 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2355 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2356 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2357 
2358 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2359 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2360 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2361 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2362 
2363 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2364 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2365 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2366 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2367 
2368 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2369 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2370 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2371 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2372 
2373 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2374 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2375 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2376 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2377 
2378 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2379 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2380 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2381 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2382 
2383 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2384 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2385 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2386 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2387 
2388 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2389 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2390 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2391 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2392 
2393 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2394 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2396 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397 
2398 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2399 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2401 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402 
2403 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2404 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2406 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407 
2408 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2409 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2411 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412 
2413 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2414 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2415 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2416 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2417 
2418 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2419 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2420 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2421 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2422 
2423 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2424 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2425 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2426 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2427 
2428 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2429 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2430 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2431 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2432 
2433 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2434 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2435 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2436 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2437 
2438 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2439 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2440 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2441 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2442 
2443 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2444 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2445 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2446 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2447 
2448 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2449 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2450 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2451 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2452 
2453 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2454 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2455 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2456 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2457 
2458 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2459 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2460 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2461 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2462 
2463 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2464 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2465 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2466 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2467 
2468 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2469 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2470 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2471 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2472 
2473 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2474 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2475 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2476 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2477 
2478 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2479 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2480 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2481 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2482 
2483 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2484 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2485 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2486 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2487 
2488 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2489 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2490 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2491 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2492 
2493 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2494 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2495 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2496 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2497 
2498 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2499 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2500 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2501 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2502 
2503 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2504 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2505 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2506 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2507 
2508 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2509 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2510 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2511 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2512 }
2513 
2514 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2515 {
2516 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2517 
2518 	if (hdev->asic_prop.fw_security_enabled)
2519 		return;
2520 
2521 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2522 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2523 		return;
2524 
2525 	hbm0_wr = 0x33333333;
2526 	hbm0_rd = 0x77777777;
2527 	hbm1_wr = 0x55555555;
2528 	hbm1_rd = 0xDDDDDDDD;
2529 
2530 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2531 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2532 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2533 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2534 
2535 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2536 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2537 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2538 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2539 
2540 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2541 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2542 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2543 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2544 
2545 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2546 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2547 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2548 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2549 
2550 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2551 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2552 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2553 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2554 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2555 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2556 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2557 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2558 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2559 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2560 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2561 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2562 
2563 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2564 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2565 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2566 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2567 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2568 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2569 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2570 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2571 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2572 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2573 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2574 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2575 }
2576 
2577 static void gaudi_init_golden_registers(struct hl_device *hdev)
2578 {
2579 	u32 tpc_offset;
2580 	int tpc_id, i;
2581 
2582 	gaudi_init_e2e(hdev);
2583 	gaudi_init_hbm_cred(hdev);
2584 
2585 	for (tpc_id = 0, tpc_offset = 0;
2586 				tpc_id < TPC_NUMBER_OF_ENGINES;
2587 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2588 		/* Mask all arithmetic interrupts from TPC */
2589 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2590 		/* Set 16 cache lines */
2591 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2592 				ICACHE_FETCH_LINE_NUM, 2);
2593 	}
2594 
2595 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2596 	for (i = 0 ; i < 128 ; i += 8)
2597 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2598 
2599 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2600 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2601 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2602 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2603 }
2604 
2605 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2606 					int qman_id, dma_addr_t qman_pq_addr)
2607 {
2608 	struct cpu_dyn_regs *dyn_regs =
2609 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2610 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2611 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2612 	u32 q_off, dma_qm_offset;
2613 	u32 dma_qm_err_cfg, irq_handler_offset;
2614 
2615 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2616 
2617 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2618 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2619 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2620 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2621 	so_base_en_lo = lower_32_bits(CFG_BASE +
2622 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2623 	so_base_en_hi = upper_32_bits(CFG_BASE +
2624 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2625 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2626 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2627 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2628 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2629 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2630 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2632 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2633 
2634 	q_off = dma_qm_offset + qman_id * 4;
2635 
2636 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2637 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2638 
2639 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2640 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2641 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2642 
2643 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2644 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2645 							QMAN_LDMA_SRC_OFFSET);
2646 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2647 							QMAN_LDMA_DST_OFFSET);
2648 
2649 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2650 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2651 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2652 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2653 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2654 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2655 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2656 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2657 
2658 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2659 
2660 	/* The following configuration is needed only once per QMAN */
2661 	if (qman_id == 0) {
2662 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2663 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2664 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2665 
2666 		/* Configure RAZWI IRQ */
2667 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2668 		if (hdev->stop_on_err)
2669 			dma_qm_err_cfg |=
2670 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2671 
2672 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2673 
2674 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2675 			lower_32_bits(CFG_BASE + irq_handler_offset));
2676 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2677 			upper_32_bits(CFG_BASE + irq_handler_offset));
2678 
2679 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2680 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2681 									dma_id);
2682 
2683 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2684 				QM_ARB_ERR_MSG_EN_MASK);
2685 
2686 		/* Set timeout to maximum */
2687 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2688 
2689 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2690 				QMAN_EXTERNAL_MAKE_TRUSTED);
2691 
2692 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2693 	}
2694 }
2695 
2696 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2697 {
2698 	struct cpu_dyn_regs *dyn_regs =
2699 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2700 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2701 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2702 	u32 irq_handler_offset;
2703 
2704 	/* Set to maximum possible according to physical size */
2705 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2706 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2707 
2708 	/* WA for H/W bug H3-2116 */
2709 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2710 
2711 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2712 	if (hdev->stop_on_err)
2713 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2714 
2715 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2716 
2717 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2718 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2719 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2720 
2721 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2722 		lower_32_bits(CFG_BASE + irq_handler_offset));
2723 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2724 		upper_32_bits(CFG_BASE + irq_handler_offset));
2725 
2726 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2727 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2728 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2729 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2730 	/* If the channel is secured, it should be in MMU bypass mode */
2731 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2732 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2733 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2734 }
2735 
2736 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2737 				u32 enable_mask)
2738 {
2739 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2740 
2741 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2742 }
2743 
2744 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2745 {
2746 	struct gaudi_device *gaudi = hdev->asic_specific;
2747 	struct hl_hw_queue *q;
2748 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2749 
2750 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2751 		return;
2752 
2753 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2754 		dma_id = gaudi_dma_assignment[i];
2755 		/*
2756 		 * For queues after the CPU Q need to add 1 to get the correct
2757 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2758 		 * order to get the correct MSI register.
2759 		 */
2760 		if (dma_id > 1) {
2761 			cpu_skip = 1;
2762 			nic_skip = NIC_NUMBER_OF_ENGINES;
2763 		} else {
2764 			cpu_skip = 0;
2765 			nic_skip = 0;
2766 		}
2767 
2768 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2769 			q_idx = 4 * dma_id + j + cpu_skip;
2770 			q = &hdev->kernel_queues[q_idx];
2771 			q->cq_id = cq_id++;
2772 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2773 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2774 						q->bus_address);
2775 		}
2776 
2777 		gaudi_init_dma_core(hdev, dma_id);
2778 
2779 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2780 	}
2781 
2782 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2783 }
2784 
2785 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2786 					int qman_id, u64 qman_base_addr)
2787 {
2788 	struct cpu_dyn_regs *dyn_regs =
2789 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2790 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2791 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2792 	u32 dma_qm_err_cfg, irq_handler_offset;
2793 	u32 q_off, dma_qm_offset;
2794 
2795 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2796 
2797 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2798 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2799 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2800 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2801 	so_base_en_lo = lower_32_bits(CFG_BASE +
2802 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2803 	so_base_en_hi = upper_32_bits(CFG_BASE +
2804 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2805 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2806 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2807 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2808 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2809 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2810 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2812 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2813 
2814 	q_off = dma_qm_offset + qman_id * 4;
2815 
2816 	if (qman_id < 4) {
2817 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2818 					lower_32_bits(qman_base_addr));
2819 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2820 					upper_32_bits(qman_base_addr));
2821 
2822 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2823 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2824 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2825 
2826 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2827 							QMAN_CPDMA_SIZE_OFFSET);
2828 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2829 							QMAN_CPDMA_SRC_OFFSET);
2830 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2831 							QMAN_CPDMA_DST_OFFSET);
2832 	} else {
2833 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2834 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2835 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2836 
2837 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2838 							QMAN_LDMA_SIZE_OFFSET);
2839 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2840 							QMAN_LDMA_SRC_OFFSET);
2841 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2842 							QMAN_LDMA_DST_OFFSET);
2843 
2844 		/* Configure RAZWI IRQ */
2845 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2846 		if (hdev->stop_on_err)
2847 			dma_qm_err_cfg |=
2848 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2849 
2850 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2851 
2852 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2853 			lower_32_bits(CFG_BASE + irq_handler_offset));
2854 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2855 			upper_32_bits(CFG_BASE + irq_handler_offset));
2856 
2857 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2858 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2859 									dma_id);
2860 
2861 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2862 				QM_ARB_ERR_MSG_EN_MASK);
2863 
2864 		/* Set timeout to maximum */
2865 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2866 
2867 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2868 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2869 				QMAN_INTERNAL_MAKE_TRUSTED);
2870 	}
2871 
2872 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2873 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2874 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2875 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2876 
2877 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2878 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2879 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2880 				mtr_base_ws_lo);
2881 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2882 				mtr_base_ws_hi);
2883 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2884 				so_base_ws_lo);
2885 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2886 				so_base_ws_hi);
2887 	}
2888 }
2889 
2890 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2891 {
2892 	struct gaudi_device *gaudi = hdev->asic_specific;
2893 	struct gaudi_internal_qman_info *q;
2894 	u64 qman_base_addr;
2895 	int i, j, dma_id, internal_q_index;
2896 
2897 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2898 		return;
2899 
2900 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2901 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2902 
2903 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2904 			 /*
2905 			  * Add the CPU queue in order to get the correct queue
2906 			  * number as all internal queue are placed after it
2907 			  */
2908 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2909 
2910 			q = &gaudi->internal_qmans[internal_q_index];
2911 			qman_base_addr = (u64) q->pq_dma_addr;
2912 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2913 						qman_base_addr);
2914 		}
2915 
2916 		/* Initializing lower CP for HBM DMA QMAN */
2917 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2918 
2919 		gaudi_init_dma_core(hdev, dma_id);
2920 
2921 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2922 	}
2923 
2924 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2925 }
2926 
2927 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2928 					int qman_id, u64 qman_base_addr)
2929 {
2930 	struct cpu_dyn_regs *dyn_regs =
2931 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2932 	u32 mtr_base_lo, mtr_base_hi;
2933 	u32 so_base_lo, so_base_hi;
2934 	u32 irq_handler_offset;
2935 	u32 q_off, mme_id;
2936 	u32 mme_qm_err_cfg;
2937 
2938 	mtr_base_lo = lower_32_bits(CFG_BASE +
2939 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2940 	mtr_base_hi = upper_32_bits(CFG_BASE +
2941 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2942 	so_base_lo = lower_32_bits(CFG_BASE +
2943 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2944 	so_base_hi = upper_32_bits(CFG_BASE +
2945 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2946 
2947 	q_off = mme_offset + qman_id * 4;
2948 
2949 	if (qman_id < 4) {
2950 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2951 					lower_32_bits(qman_base_addr));
2952 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2953 					upper_32_bits(qman_base_addr));
2954 
2955 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2956 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2957 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2958 
2959 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2960 							QMAN_CPDMA_SIZE_OFFSET);
2961 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2962 							QMAN_CPDMA_SRC_OFFSET);
2963 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2964 							QMAN_CPDMA_DST_OFFSET);
2965 	} else {
2966 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2967 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2968 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2969 
2970 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2971 							QMAN_LDMA_SIZE_OFFSET);
2972 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2973 							QMAN_LDMA_SRC_OFFSET);
2974 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2975 							QMAN_LDMA_DST_OFFSET);
2976 
2977 		/* Configure RAZWI IRQ */
2978 		mme_id = mme_offset /
2979 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2980 
2981 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2982 		if (hdev->stop_on_err)
2983 			mme_qm_err_cfg |=
2984 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2985 
2986 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2987 
2988 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2989 			lower_32_bits(CFG_BASE + irq_handler_offset));
2990 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2991 			upper_32_bits(CFG_BASE + irq_handler_offset));
2992 
2993 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2994 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2995 									mme_id);
2996 
2997 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2998 				QM_ARB_ERR_MSG_EN_MASK);
2999 
3000 		/* Set timeout to maximum */
3001 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3002 
3003 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3004 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3005 				QMAN_INTERNAL_MAKE_TRUSTED);
3006 	}
3007 
3008 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3009 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3010 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3011 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3012 }
3013 
3014 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3015 {
3016 	struct gaudi_device *gaudi = hdev->asic_specific;
3017 	struct gaudi_internal_qman_info *q;
3018 	u64 qman_base_addr;
3019 	u32 mme_offset;
3020 	int i, internal_q_index;
3021 
3022 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3023 		return;
3024 
3025 	/*
3026 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3027 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3028 	 */
3029 
3030 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3031 
3032 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3033 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3034 		q = &gaudi->internal_qmans[internal_q_index];
3035 		qman_base_addr = (u64) q->pq_dma_addr;
3036 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3037 					qman_base_addr);
3038 		if (i == 3)
3039 			mme_offset = 0;
3040 	}
3041 
3042 	/* Initializing lower CP for MME QMANs */
3043 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3044 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3045 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3046 
3047 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3048 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3049 
3050 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3051 }
3052 
3053 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3054 				int qman_id, u64 qman_base_addr)
3055 {
3056 	struct cpu_dyn_regs *dyn_regs =
3057 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3058 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3059 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3060 	u32 tpc_qm_err_cfg, irq_handler_offset;
3061 	u32 q_off, tpc_id;
3062 
3063 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3064 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3066 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3067 	so_base_en_lo = lower_32_bits(CFG_BASE +
3068 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3069 	so_base_en_hi = upper_32_bits(CFG_BASE +
3070 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3071 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3072 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3073 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3074 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3075 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3076 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3078 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3079 
3080 	q_off = tpc_offset + qman_id * 4;
3081 
3082 	tpc_id = tpc_offset /
3083 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3084 
3085 	if (qman_id < 4) {
3086 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3087 					lower_32_bits(qman_base_addr));
3088 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3089 					upper_32_bits(qman_base_addr));
3090 
3091 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3092 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3093 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3094 
3095 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3096 							QMAN_CPDMA_SIZE_OFFSET);
3097 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3098 							QMAN_CPDMA_SRC_OFFSET);
3099 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3100 							QMAN_CPDMA_DST_OFFSET);
3101 	} else {
3102 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3103 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3104 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3105 
3106 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3107 							QMAN_LDMA_SIZE_OFFSET);
3108 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3109 							QMAN_LDMA_SRC_OFFSET);
3110 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3111 							QMAN_LDMA_DST_OFFSET);
3112 
3113 		/* Configure RAZWI IRQ */
3114 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3115 		if (hdev->stop_on_err)
3116 			tpc_qm_err_cfg |=
3117 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3118 
3119 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3120 
3121 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3122 			lower_32_bits(CFG_BASE + irq_handler_offset));
3123 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3124 			upper_32_bits(CFG_BASE + irq_handler_offset));
3125 
3126 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3127 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3128 									tpc_id);
3129 
3130 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3131 				QM_ARB_ERR_MSG_EN_MASK);
3132 
3133 		/* Set timeout to maximum */
3134 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3135 
3136 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3137 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3138 				QMAN_INTERNAL_MAKE_TRUSTED);
3139 	}
3140 
3141 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3142 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3143 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3144 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3145 
3146 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3147 	if (tpc_id == 6) {
3148 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3149 				mtr_base_ws_lo);
3150 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3151 				mtr_base_ws_hi);
3152 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3153 				so_base_ws_lo);
3154 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3155 				so_base_ws_hi);
3156 	}
3157 }
3158 
3159 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3160 {
3161 	struct gaudi_device *gaudi = hdev->asic_specific;
3162 	struct gaudi_internal_qman_info *q;
3163 	u64 qman_base_addr;
3164 	u32 so_base_hi, tpc_offset = 0;
3165 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3166 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3167 	int i, tpc_id, internal_q_index;
3168 
3169 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3170 		return;
3171 
3172 	so_base_hi = upper_32_bits(CFG_BASE +
3173 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3174 
3175 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3176 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3177 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3178 						tpc_id * QMAN_STREAMS + i;
3179 			q = &gaudi->internal_qmans[internal_q_index];
3180 			qman_base_addr = (u64) q->pq_dma_addr;
3181 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3182 						qman_base_addr);
3183 
3184 			if (i == 3) {
3185 				/* Initializing lower CP for TPC QMAN */
3186 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3187 
3188 				/* Enable the QMAN and TPC channel */
3189 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3190 						QMAN_TPC_ENABLE);
3191 			}
3192 		}
3193 
3194 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3195 				so_base_hi);
3196 
3197 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3198 
3199 		gaudi->hw_cap_initialized |=
3200 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3201 	}
3202 }
3203 
3204 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3205 				int qman_id, u64 qman_base_addr, int nic_id)
3206 {
3207 	struct cpu_dyn_regs *dyn_regs =
3208 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3209 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3210 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3211 	u32 nic_qm_err_cfg, irq_handler_offset;
3212 	u32 q_off;
3213 
3214 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3215 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3216 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3217 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3218 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3219 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3220 	so_base_en_hi = upper_32_bits(CFG_BASE +
3221 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3222 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3223 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3224 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3225 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3226 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3227 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3229 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3230 
3231 	q_off = nic_offset + qman_id * 4;
3232 
3233 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3234 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3235 
3236 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3237 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3238 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3239 
3240 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3241 							QMAN_LDMA_SIZE_OFFSET);
3242 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3243 							QMAN_LDMA_SRC_OFFSET);
3244 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3245 							QMAN_LDMA_DST_OFFSET);
3246 
3247 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3248 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3249 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3250 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3251 
3252 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3253 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3254 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3255 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3256 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3257 
3258 	if (qman_id == 0) {
3259 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3260 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3261 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3262 
3263 		/* Configure RAZWI IRQ */
3264 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3265 		if (hdev->stop_on_err)
3266 			nic_qm_err_cfg |=
3267 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3268 
3269 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3270 
3271 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3272 			lower_32_bits(CFG_BASE + irq_handler_offset));
3273 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3274 			upper_32_bits(CFG_BASE + irq_handler_offset));
3275 
3276 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3277 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3278 									nic_id);
3279 
3280 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3281 				QM_ARB_ERR_MSG_EN_MASK);
3282 
3283 		/* Set timeout to maximum */
3284 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3285 
3286 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3287 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3288 				QMAN_INTERNAL_MAKE_TRUSTED);
3289 	}
3290 }
3291 
3292 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3293 {
3294 	struct gaudi_device *gaudi = hdev->asic_specific;
3295 	struct gaudi_internal_qman_info *q;
3296 	u64 qman_base_addr;
3297 	u32 nic_offset = 0;
3298 	u32 nic_delta_between_qmans =
3299 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3300 	u32 nic_delta_between_nics =
3301 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3302 	int i, nic_id, internal_q_index;
3303 
3304 	if (!hdev->nic_ports_mask)
3305 		return;
3306 
3307 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3308 		return;
3309 
3310 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3311 
3312 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3313 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3314 			nic_offset += nic_delta_between_qmans;
3315 			if (nic_id & 1) {
3316 				nic_offset -= (nic_delta_between_qmans * 2);
3317 				nic_offset += nic_delta_between_nics;
3318 			}
3319 			continue;
3320 		}
3321 
3322 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3323 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3324 						nic_id * QMAN_STREAMS + i;
3325 			q = &gaudi->internal_qmans[internal_q_index];
3326 			qman_base_addr = (u64) q->pq_dma_addr;
3327 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3328 						qman_base_addr, nic_id);
3329 		}
3330 
3331 		/* Enable the QMAN */
3332 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3333 
3334 		nic_offset += nic_delta_between_qmans;
3335 		if (nic_id & 1) {
3336 			nic_offset -= (nic_delta_between_qmans * 2);
3337 			nic_offset += nic_delta_between_nics;
3338 		}
3339 
3340 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3341 	}
3342 }
3343 
3344 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3345 {
3346 	struct gaudi_device *gaudi = hdev->asic_specific;
3347 
3348 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3349 		return;
3350 
3351 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3352 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3353 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3354 }
3355 
3356 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3357 {
3358 	struct gaudi_device *gaudi = hdev->asic_specific;
3359 
3360 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3361 		return;
3362 
3363 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3364 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3365 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3366 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3367 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3368 }
3369 
3370 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3371 {
3372 	struct gaudi_device *gaudi = hdev->asic_specific;
3373 
3374 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3375 		return;
3376 
3377 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3378 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3379 }
3380 
3381 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3382 {
3383 	struct gaudi_device *gaudi = hdev->asic_specific;
3384 	u32 tpc_offset = 0;
3385 	int tpc_id;
3386 
3387 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3388 		return;
3389 
3390 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3391 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3392 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3393 	}
3394 }
3395 
3396 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3397 {
3398 	struct gaudi_device *gaudi = hdev->asic_specific;
3399 	u32 nic_mask, nic_offset = 0;
3400 	u32 nic_delta_between_qmans =
3401 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3402 	u32 nic_delta_between_nics =
3403 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3404 	int nic_id;
3405 
3406 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3407 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3408 
3409 		if (gaudi->hw_cap_initialized & nic_mask)
3410 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3411 
3412 		nic_offset += nic_delta_between_qmans;
3413 		if (nic_id & 1) {
3414 			nic_offset -= (nic_delta_between_qmans * 2);
3415 			nic_offset += nic_delta_between_nics;
3416 		}
3417 	}
3418 }
3419 
3420 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3421 {
3422 	struct gaudi_device *gaudi = hdev->asic_specific;
3423 
3424 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3425 		return;
3426 
3427 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3428 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3429 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3430 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3431 }
3432 
3433 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3434 {
3435 	struct gaudi_device *gaudi = hdev->asic_specific;
3436 
3437 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3438 		return;
3439 
3440 	/* Stop CPs of HBM DMA QMANs */
3441 
3442 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3445 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3446 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3447 }
3448 
3449 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3450 {
3451 	struct gaudi_device *gaudi = hdev->asic_specific;
3452 
3453 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3454 		return;
3455 
3456 	/* Stop CPs of MME QMANs */
3457 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3458 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459 }
3460 
3461 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3462 {
3463 	struct gaudi_device *gaudi = hdev->asic_specific;
3464 
3465 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3466 		return;
3467 
3468 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476 }
3477 
3478 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3479 {
3480 	struct gaudi_device *gaudi = hdev->asic_specific;
3481 
3482 	/* Stop upper CPs of QMANs */
3483 
3484 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3485 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3486 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3487 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3488 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3489 
3490 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3491 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3492 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3493 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3494 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3495 
3496 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3497 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3498 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3499 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3500 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3501 
3502 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3503 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3504 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3505 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3506 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3507 
3508 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3509 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3510 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3511 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3512 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3513 
3514 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3515 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3516 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3517 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3518 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3519 
3520 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3521 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3522 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3525 
3526 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3527 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3528 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3531 
3532 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3533 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3534 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3537 
3538 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3539 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3540 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3543 }
3544 
3545 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3546 {
3547 	struct gaudi_device *gaudi = hdev->asic_specific;
3548 
3549 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3550 		return;
3551 
3552 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3553 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3554 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3555 }
3556 
3557 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3558 {
3559 	struct gaudi_device *gaudi = hdev->asic_specific;
3560 
3561 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3562 		return;
3563 
3564 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3568 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3569 }
3570 
3571 static void gaudi_mme_stall(struct hl_device *hdev)
3572 {
3573 	struct gaudi_device *gaudi = hdev->asic_specific;
3574 
3575 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3576 		return;
3577 
3578 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3579 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3580 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3581 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3582 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3583 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3584 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3585 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3586 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3587 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3588 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3589 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3590 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3591 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3592 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3593 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3594 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3595 }
3596 
3597 static void gaudi_tpc_stall(struct hl_device *hdev)
3598 {
3599 	struct gaudi_device *gaudi = hdev->asic_specific;
3600 
3601 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3602 		return;
3603 
3604 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3609 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3610 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3611 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3612 }
3613 
3614 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3615 {
3616 	u32 qman_offset;
3617 	int i;
3618 
3619 	if (hdev->asic_prop.fw_security_enabled)
3620 		return;
3621 
3622 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3623 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3624 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3625 
3626 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3627 	}
3628 
3629 	WREG32(mmMME0_QM_CGM_CFG, 0);
3630 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3631 	WREG32(mmMME2_QM_CGM_CFG, 0);
3632 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3633 
3634 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3635 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3636 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3637 
3638 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3639 	}
3640 }
3641 
3642 static void gaudi_enable_timestamp(struct hl_device *hdev)
3643 {
3644 	/* Disable the timestamp counter */
3645 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3646 
3647 	/* Zero the lower/upper parts of the 64-bit counter */
3648 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3649 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3650 
3651 	/* Enable the counter */
3652 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3653 }
3654 
3655 static void gaudi_disable_timestamp(struct hl_device *hdev)
3656 {
3657 	/* Disable the timestamp counter */
3658 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3659 }
3660 
3661 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3662 {
3663 	u32 wait_timeout_ms;
3664 
3665 	if (hdev->pldm)
3666 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3667 	else
3668 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3669 
3670 	if (fw_reset)
3671 		goto skip_engines;
3672 
3673 	gaudi_stop_nic_qmans(hdev);
3674 	gaudi_stop_mme_qmans(hdev);
3675 	gaudi_stop_tpc_qmans(hdev);
3676 	gaudi_stop_hbm_dma_qmans(hdev);
3677 	gaudi_stop_pci_dma_qmans(hdev);
3678 
3679 	msleep(wait_timeout_ms);
3680 
3681 	gaudi_pci_dma_stall(hdev);
3682 	gaudi_hbm_dma_stall(hdev);
3683 	gaudi_tpc_stall(hdev);
3684 	gaudi_mme_stall(hdev);
3685 
3686 	msleep(wait_timeout_ms);
3687 
3688 	gaudi_disable_nic_qmans(hdev);
3689 	gaudi_disable_mme_qmans(hdev);
3690 	gaudi_disable_tpc_qmans(hdev);
3691 	gaudi_disable_hbm_dma_qmans(hdev);
3692 	gaudi_disable_pci_dma_qmans(hdev);
3693 
3694 	gaudi_disable_timestamp(hdev);
3695 
3696 skip_engines:
3697 	gaudi_disable_msi(hdev);
3698 }
3699 
3700 static int gaudi_mmu_init(struct hl_device *hdev)
3701 {
3702 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3703 	struct gaudi_device *gaudi = hdev->asic_specific;
3704 	u64 hop0_addr;
3705 	int rc, i;
3706 
3707 	if (!hdev->mmu_enable)
3708 		return 0;
3709 
3710 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3711 		return 0;
3712 
3713 	for (i = 0 ; i < prop->max_asid ; i++) {
3714 		hop0_addr = prop->mmu_pgt_addr +
3715 				(i * prop->mmu_hop_table_size);
3716 
3717 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3718 		if (rc) {
3719 			dev_err(hdev->dev,
3720 				"failed to set hop0 addr for asid %d\n", i);
3721 			goto err;
3722 		}
3723 	}
3724 
3725 	/* init MMU cache manage page */
3726 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3727 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3728 
3729 	/* mem cache invalidation */
3730 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3731 
3732 	hl_mmu_invalidate_cache(hdev, true, 0);
3733 
3734 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3735 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3736 
3737 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3738 
3739 	/*
3740 	 * The H/W expects the first PI after init to be 1. After wraparound
3741 	 * we'll write 0.
3742 	 */
3743 	gaudi->mmu_cache_inv_pi = 1;
3744 
3745 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3746 
3747 	return 0;
3748 
3749 err:
3750 	return rc;
3751 }
3752 
3753 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3754 {
3755 	void __iomem *dst;
3756 
3757 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3758 
3759 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3760 }
3761 
3762 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3763 {
3764 	void __iomem *dst;
3765 
3766 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3767 
3768 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3769 }
3770 
3771 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3772 {
3773 	struct dynamic_fw_load_mgr *dynamic_loader;
3774 	struct cpu_dyn_regs *dyn_regs;
3775 
3776 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3777 
3778 	/*
3779 	 * here we update initial values for few specific dynamic regs (as
3780 	 * before reading the first descriptor from FW those value has to be
3781 	 * hard-coded) in later stages of the protocol those values will be
3782 	 * updated automatically by reading the FW descriptor so data there
3783 	 * will always be up-to-date
3784 	 */
3785 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3786 	dyn_regs->kmd_msg_to_cpu =
3787 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3788 	dyn_regs->cpu_cmd_status_to_host =
3789 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3790 
3791 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3792 }
3793 
3794 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3795 {
3796 	struct static_fw_load_mgr *static_loader;
3797 
3798 	static_loader = &hdev->fw_loader.static_loader;
3799 
3800 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3801 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3802 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3803 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3804 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3805 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3806 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3807 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3808 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3809 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3810 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3811 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3812 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3813 			GAUDI_PLDM_RESET_WAIT_MSEC :
3814 			GAUDI_CPU_RESET_WAIT_MSEC;
3815 }
3816 
3817 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3818 {
3819 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3820 
3821 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3822 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3823 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3824 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3825 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3826 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3827 }
3828 
3829 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3830 {
3831 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3832 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3833 
3834 	/* fill common fields */
3835 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3836 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3837 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3838 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3839 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3840 	fw_loader->skip_bmc = !hdev->bmc_enable;
3841 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3842 	fw_loader->dram_bar_id = HBM_BAR_ID;
3843 
3844 	if (prop->dynamic_fw_load)
3845 		gaudi_init_dynamic_firmware_loader(hdev);
3846 	else
3847 		gaudi_init_static_firmware_loader(hdev);
3848 }
3849 
3850 static int gaudi_init_cpu(struct hl_device *hdev)
3851 {
3852 	struct gaudi_device *gaudi = hdev->asic_specific;
3853 	int rc;
3854 
3855 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3856 		return 0;
3857 
3858 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3859 		return 0;
3860 
3861 	/*
3862 	 * The device CPU works with 40 bits addresses.
3863 	 * This register sets the extension to 50 bits.
3864 	 */
3865 	if (!hdev->asic_prop.fw_security_enabled)
3866 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3867 
3868 	rc = hl_fw_init_cpu(hdev);
3869 
3870 	if (rc)
3871 		return rc;
3872 
3873 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3874 
3875 	return 0;
3876 }
3877 
3878 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3879 {
3880 	struct cpu_dyn_regs *dyn_regs =
3881 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3882 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3883 	struct gaudi_device *gaudi = hdev->asic_specific;
3884 	u32 status, irq_handler_offset;
3885 	struct hl_eq *eq;
3886 	struct hl_hw_queue *cpu_pq =
3887 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3888 	int err;
3889 
3890 	if (!hdev->cpu_queues_enable)
3891 		return 0;
3892 
3893 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3894 		return 0;
3895 
3896 	eq = &hdev->event_queue;
3897 
3898 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3899 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3900 
3901 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3902 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3903 
3904 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3905 			lower_32_bits(hdev->cpu_accessible_dma_address));
3906 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3907 			upper_32_bits(hdev->cpu_accessible_dma_address));
3908 
3909 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3910 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3911 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3912 
3913 	/* Used for EQ CI */
3914 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3915 
3916 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3917 
3918 	if (gaudi->multi_msi_mode)
3919 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3920 	else
3921 		WREG32(mmCPU_IF_QUEUE_INIT,
3922 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3923 
3924 	irq_handler_offset = prop->gic_interrupts_enable ?
3925 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3926 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3927 
3928 	WREG32(irq_handler_offset,
3929 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3930 
3931 	err = hl_poll_timeout(
3932 		hdev,
3933 		mmCPU_IF_QUEUE_INIT,
3934 		status,
3935 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3936 		1000,
3937 		cpu_timeout);
3938 
3939 	if (err) {
3940 		dev_err(hdev->dev,
3941 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3942 		return -EIO;
3943 	}
3944 
3945 	/* update FW application security bits */
3946 	if (prop->fw_cpu_boot_dev_sts0_valid)
3947 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3948 	if (prop->fw_cpu_boot_dev_sts1_valid)
3949 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3950 
3951 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3952 	return 0;
3953 }
3954 
3955 static void gaudi_pre_hw_init(struct hl_device *hdev)
3956 {
3957 	/* Perform read from the device to make sure device is up */
3958 	RREG32(mmHW_STATE);
3959 
3960 	if (!hdev->asic_prop.fw_security_enabled) {
3961 		/* Set the access through PCI bars (Linux driver only) as
3962 		 * secured
3963 		 */
3964 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3965 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3966 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3967 
3968 		/* Perform read to flush the waiting writes to ensure
3969 		 * configuration was set in the device
3970 		 */
3971 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3972 	}
3973 
3974 	/*
3975 	 * Let's mark in the H/W that we have reached this point. We check
3976 	 * this value in the reset_before_init function to understand whether
3977 	 * we need to reset the chip before doing H/W init. This register is
3978 	 * cleared by the H/W upon H/W reset
3979 	 */
3980 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3981 }
3982 
3983 static int gaudi_hw_init(struct hl_device *hdev)
3984 {
3985 	struct gaudi_device *gaudi = hdev->asic_specific;
3986 	int rc;
3987 
3988 	gaudi_pre_hw_init(hdev);
3989 
3990 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3991 	 * So we set it here and if anyone tries to move it later to
3992 	 * a different address, there will be an error
3993 	 */
3994 	if (hdev->asic_prop.iatu_done_by_fw)
3995 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3996 
3997 	/*
3998 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3999 	 * base address of dram
4000 	 */
4001 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4002 		dev_err(hdev->dev,
4003 			"failed to map HBM bar to DRAM base address\n");
4004 		return -EIO;
4005 	}
4006 
4007 	rc = gaudi_init_cpu(hdev);
4008 	if (rc) {
4009 		dev_err(hdev->dev, "failed to initialize CPU\n");
4010 		return rc;
4011 	}
4012 
4013 	/* In case the clock gating was enabled in preboot we need to disable
4014 	 * it here before touching the MME/TPC registers.
4015 	 */
4016 	gaudi_disable_clock_gating(hdev);
4017 
4018 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4019 	gaudi_init_scrambler_sram(hdev);
4020 
4021 	/* This is here just in case we are working without CPU */
4022 	gaudi_init_scrambler_hbm(hdev);
4023 
4024 	gaudi_init_golden_registers(hdev);
4025 
4026 	rc = gaudi_mmu_init(hdev);
4027 	if (rc)
4028 		return rc;
4029 
4030 	gaudi_init_security(hdev);
4031 
4032 	gaudi_init_pci_dma_qmans(hdev);
4033 
4034 	gaudi_init_hbm_dma_qmans(hdev);
4035 
4036 	gaudi_init_mme_qmans(hdev);
4037 
4038 	gaudi_init_tpc_qmans(hdev);
4039 
4040 	gaudi_init_nic_qmans(hdev);
4041 
4042 	gaudi_enable_timestamp(hdev);
4043 
4044 	/* MSI must be enabled before CPU queues and NIC are initialized */
4045 	rc = gaudi_enable_msi(hdev);
4046 	if (rc)
4047 		goto disable_queues;
4048 
4049 	/* must be called after MSI was enabled */
4050 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4051 	if (rc) {
4052 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4053 			rc);
4054 		goto disable_msi;
4055 	}
4056 
4057 	/* Perform read from the device to flush all configuration */
4058 	RREG32(mmHW_STATE);
4059 
4060 	return 0;
4061 
4062 disable_msi:
4063 	gaudi_disable_msi(hdev);
4064 disable_queues:
4065 	gaudi_disable_mme_qmans(hdev);
4066 	gaudi_disable_pci_dma_qmans(hdev);
4067 
4068 	return rc;
4069 }
4070 
4071 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4072 {
4073 	struct cpu_dyn_regs *dyn_regs =
4074 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4075 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4076 	struct gaudi_device *gaudi = hdev->asic_specific;
4077 	bool driver_performs_reset;
4078 
4079 	if (!hard_reset) {
4080 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4081 		return;
4082 	}
4083 
4084 	if (hdev->pldm) {
4085 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4086 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4087 	} else {
4088 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4089 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4090 	}
4091 
4092 	if (fw_reset) {
4093 		dev_dbg(hdev->dev,
4094 			"Firmware performs HARD reset, going to wait %dms\n",
4095 			reset_timeout_ms);
4096 
4097 		goto skip_reset;
4098 	}
4099 
4100 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4101 					!hdev->asic_prop.hard_reset_done_by_fw);
4102 
4103 	/* Set device to handle FLR by H/W as we will put the device CPU to
4104 	 * halt mode
4105 	 */
4106 	if (driver_performs_reset)
4107 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4108 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4109 
4110 	/* If linux is loaded in the device CPU we need to communicate with it
4111 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4112 	 * registers in case of old F/Ws
4113 	 */
4114 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4115 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4116 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4117 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4118 
4119 		WREG32(irq_handler_offset,
4120 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4121 
4122 		/* This is a hail-mary attempt to revive the card in the small chance that the
4123 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4124 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4125 		 * reset as if Linux wasn't loaded.
4126 		 *
4127 		 * We do it only if the reset cause was HB, because that would be the indication
4128 		 * of such an event.
4129 		 *
4130 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4131 		 * damage.
4132 		 */
4133 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4134 			if (hdev->asic_prop.hard_reset_done_by_fw)
4135 				hl_fw_ask_hard_reset_without_linux(hdev);
4136 			else
4137 				hl_fw_ask_halt_machine_without_linux(hdev);
4138 		}
4139 	} else {
4140 		if (hdev->asic_prop.hard_reset_done_by_fw)
4141 			hl_fw_ask_hard_reset_without_linux(hdev);
4142 		else
4143 			hl_fw_ask_halt_machine_without_linux(hdev);
4144 	}
4145 
4146 	if (driver_performs_reset) {
4147 
4148 		/* Configure the reset registers. Must be done as early as
4149 		 * possible in case we fail during H/W initialization
4150 		 */
4151 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4152 						(CFG_RST_H_DMA_MASK |
4153 						CFG_RST_H_MME_MASK |
4154 						CFG_RST_H_SM_MASK |
4155 						CFG_RST_H_TPC_7_MASK));
4156 
4157 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4158 
4159 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4160 						(CFG_RST_H_HBM_MASK |
4161 						CFG_RST_H_TPC_7_MASK |
4162 						CFG_RST_H_NIC_MASK |
4163 						CFG_RST_H_SM_MASK |
4164 						CFG_RST_H_DMA_MASK |
4165 						CFG_RST_H_MME_MASK |
4166 						CFG_RST_H_CPU_MASK |
4167 						CFG_RST_H_MMU_MASK));
4168 
4169 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4170 						(CFG_RST_L_IF_MASK |
4171 						CFG_RST_L_PSOC_MASK |
4172 						CFG_RST_L_TPC_MASK));
4173 
4174 		msleep(cpu_timeout_ms);
4175 
4176 		/* Tell ASIC not to re-initialize PCIe */
4177 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4178 
4179 		/* Restart BTL/BLR upon hard-reset */
4180 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4181 
4182 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4183 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4184 
4185 		dev_dbg(hdev->dev,
4186 			"Issued HARD reset command, going to wait %dms\n",
4187 			reset_timeout_ms);
4188 	} else {
4189 		dev_dbg(hdev->dev,
4190 			"Firmware performs HARD reset, going to wait %dms\n",
4191 			reset_timeout_ms);
4192 	}
4193 
4194 skip_reset:
4195 	/*
4196 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4197 	 * itself is in reset. Need to wait until the reset is deasserted
4198 	 */
4199 	msleep(reset_timeout_ms);
4200 
4201 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4202 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4203 		dev_err(hdev->dev,
4204 			"Timeout while waiting for device to reset 0x%x\n",
4205 			status);
4206 
4207 	if (gaudi) {
4208 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4209 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4210 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4211 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4212 						HW_CAP_HBM_SCRAMBLER);
4213 
4214 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4215 
4216 		hdev->device_cpu_is_halted = false;
4217 	}
4218 }
4219 
4220 static int gaudi_suspend(struct hl_device *hdev)
4221 {
4222 	int rc;
4223 
4224 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4225 	if (rc)
4226 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4227 
4228 	return rc;
4229 }
4230 
4231 static int gaudi_resume(struct hl_device *hdev)
4232 {
4233 	return gaudi_init_iatu(hdev);
4234 }
4235 
4236 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4237 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4238 {
4239 	int rc;
4240 
4241 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4242 			VM_DONTCOPY | VM_NORESERVE);
4243 
4244 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4245 				(dma_addr - HOST_PHYS_BASE), size);
4246 	if (rc)
4247 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4248 
4249 	return rc;
4250 }
4251 
4252 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4253 {
4254 	struct cpu_dyn_regs *dyn_regs =
4255 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4256 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4257 	struct gaudi_device *gaudi = hdev->asic_specific;
4258 	bool invalid_queue = false;
4259 	int dma_id;
4260 
4261 	switch (hw_queue_id) {
4262 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4263 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4264 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4265 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4266 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4267 		break;
4268 
4269 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4270 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4271 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4273 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274 		break;
4275 
4276 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4277 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4278 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4280 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281 		break;
4282 
4283 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4284 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4285 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288 		break;
4289 
4290 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4291 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4292 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295 		break;
4296 
4297 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4298 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4299 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4300 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4301 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4302 		break;
4303 
4304 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4305 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4306 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4307 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4308 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4309 		break;
4310 
4311 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4312 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4313 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4314 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4315 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4316 		break;
4317 
4318 	case GAUDI_QUEUE_ID_CPU_PQ:
4319 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4320 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4321 		else
4322 			invalid_queue = true;
4323 		break;
4324 
4325 	case GAUDI_QUEUE_ID_MME_0_0:
4326 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4327 		break;
4328 
4329 	case GAUDI_QUEUE_ID_MME_0_1:
4330 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4331 		break;
4332 
4333 	case GAUDI_QUEUE_ID_MME_0_2:
4334 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4335 		break;
4336 
4337 	case GAUDI_QUEUE_ID_MME_0_3:
4338 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4339 		break;
4340 
4341 	case GAUDI_QUEUE_ID_MME_1_0:
4342 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4343 		break;
4344 
4345 	case GAUDI_QUEUE_ID_MME_1_1:
4346 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4347 		break;
4348 
4349 	case GAUDI_QUEUE_ID_MME_1_2:
4350 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4351 		break;
4352 
4353 	case GAUDI_QUEUE_ID_MME_1_3:
4354 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4355 		break;
4356 
4357 	case GAUDI_QUEUE_ID_TPC_0_0:
4358 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4359 		break;
4360 
4361 	case GAUDI_QUEUE_ID_TPC_0_1:
4362 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4363 		break;
4364 
4365 	case GAUDI_QUEUE_ID_TPC_0_2:
4366 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4367 		break;
4368 
4369 	case GAUDI_QUEUE_ID_TPC_0_3:
4370 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4371 		break;
4372 
4373 	case GAUDI_QUEUE_ID_TPC_1_0:
4374 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4375 		break;
4376 
4377 	case GAUDI_QUEUE_ID_TPC_1_1:
4378 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4379 		break;
4380 
4381 	case GAUDI_QUEUE_ID_TPC_1_2:
4382 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4383 		break;
4384 
4385 	case GAUDI_QUEUE_ID_TPC_1_3:
4386 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4387 		break;
4388 
4389 	case GAUDI_QUEUE_ID_TPC_2_0:
4390 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4391 		break;
4392 
4393 	case GAUDI_QUEUE_ID_TPC_2_1:
4394 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4395 		break;
4396 
4397 	case GAUDI_QUEUE_ID_TPC_2_2:
4398 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4399 		break;
4400 
4401 	case GAUDI_QUEUE_ID_TPC_2_3:
4402 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4403 		break;
4404 
4405 	case GAUDI_QUEUE_ID_TPC_3_0:
4406 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4407 		break;
4408 
4409 	case GAUDI_QUEUE_ID_TPC_3_1:
4410 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4411 		break;
4412 
4413 	case GAUDI_QUEUE_ID_TPC_3_2:
4414 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4415 		break;
4416 
4417 	case GAUDI_QUEUE_ID_TPC_3_3:
4418 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4419 		break;
4420 
4421 	case GAUDI_QUEUE_ID_TPC_4_0:
4422 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4423 		break;
4424 
4425 	case GAUDI_QUEUE_ID_TPC_4_1:
4426 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4427 		break;
4428 
4429 	case GAUDI_QUEUE_ID_TPC_4_2:
4430 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4431 		break;
4432 
4433 	case GAUDI_QUEUE_ID_TPC_4_3:
4434 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4435 		break;
4436 
4437 	case GAUDI_QUEUE_ID_TPC_5_0:
4438 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4439 		break;
4440 
4441 	case GAUDI_QUEUE_ID_TPC_5_1:
4442 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4443 		break;
4444 
4445 	case GAUDI_QUEUE_ID_TPC_5_2:
4446 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4447 		break;
4448 
4449 	case GAUDI_QUEUE_ID_TPC_5_3:
4450 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4451 		break;
4452 
4453 	case GAUDI_QUEUE_ID_TPC_6_0:
4454 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4455 		break;
4456 
4457 	case GAUDI_QUEUE_ID_TPC_6_1:
4458 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4459 		break;
4460 
4461 	case GAUDI_QUEUE_ID_TPC_6_2:
4462 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4463 		break;
4464 
4465 	case GAUDI_QUEUE_ID_TPC_6_3:
4466 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4467 		break;
4468 
4469 	case GAUDI_QUEUE_ID_TPC_7_0:
4470 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4471 		break;
4472 
4473 	case GAUDI_QUEUE_ID_TPC_7_1:
4474 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4475 		break;
4476 
4477 	case GAUDI_QUEUE_ID_TPC_7_2:
4478 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4479 		break;
4480 
4481 	case GAUDI_QUEUE_ID_TPC_7_3:
4482 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4483 		break;
4484 
4485 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4486 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4487 			invalid_queue = true;
4488 
4489 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4490 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4491 		break;
4492 
4493 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4494 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4495 			invalid_queue = true;
4496 
4497 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4498 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4499 		break;
4500 
4501 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4502 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4503 			invalid_queue = true;
4504 
4505 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4506 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4507 		break;
4508 
4509 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4510 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4511 			invalid_queue = true;
4512 
4513 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4514 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4515 		break;
4516 
4517 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4518 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4519 			invalid_queue = true;
4520 
4521 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4522 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4523 		break;
4524 
4525 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4526 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4527 			invalid_queue = true;
4528 
4529 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4530 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4531 		break;
4532 
4533 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4534 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4535 			invalid_queue = true;
4536 
4537 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4538 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4539 		break;
4540 
4541 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4542 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4543 			invalid_queue = true;
4544 
4545 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4546 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4547 		break;
4548 
4549 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4550 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4551 			invalid_queue = true;
4552 
4553 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4554 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4555 		break;
4556 
4557 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4558 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4559 			invalid_queue = true;
4560 
4561 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4562 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4563 		break;
4564 
4565 	default:
4566 		invalid_queue = true;
4567 	}
4568 
4569 	if (invalid_queue) {
4570 		/* Should never get here */
4571 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4572 			hw_queue_id);
4573 		return;
4574 	}
4575 
4576 	db_value = pi;
4577 
4578 	/* ring the doorbell */
4579 	WREG32(db_reg_offset, db_value);
4580 
4581 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4582 		/* make sure device CPU will read latest data from host */
4583 		mb();
4584 
4585 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4586 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4587 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4588 
4589 		WREG32(irq_handler_offset,
4590 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4591 	}
4592 }
4593 
4594 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4595 				struct hl_bd *bd)
4596 {
4597 	__le64 *pbd = (__le64 *) bd;
4598 
4599 	/* The QMANs are on the host memory so a simple copy suffice */
4600 	pqe[0] = pbd[0];
4601 	pqe[1] = pbd[1];
4602 }
4603 
4604 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4605 					dma_addr_t *dma_handle, gfp_t flags)
4606 {
4607 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4608 						dma_handle, flags);
4609 
4610 	/* Shift to the device's base physical address of host memory */
4611 	if (kernel_addr)
4612 		*dma_handle += HOST_PHYS_BASE;
4613 
4614 	return kernel_addr;
4615 }
4616 
4617 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4618 		void *cpu_addr, dma_addr_t dma_handle)
4619 {
4620 	/* Cancel the device's base physical address of host memory */
4621 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4622 
4623 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4624 }
4625 
4626 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4627 {
4628 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4629 	u64 cur_addr = prop->dram_user_base_address;
4630 	u32 chunk_size, busy;
4631 	int rc, dma_id;
4632 
4633 	while (cur_addr < prop->dram_end_address) {
4634 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4635 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4636 
4637 			chunk_size =
4638 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4639 
4640 			dev_dbg(hdev->dev,
4641 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4642 				cur_addr, cur_addr + chunk_size);
4643 
4644 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4645 					lower_32_bits(val));
4646 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4647 					upper_32_bits(val));
4648 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4649 						lower_32_bits(cur_addr));
4650 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4651 						upper_32_bits(cur_addr));
4652 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4653 					chunk_size);
4654 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4655 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4656 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4657 
4658 			cur_addr += chunk_size;
4659 
4660 			if (cur_addr == prop->dram_end_address)
4661 				break;
4662 		}
4663 
4664 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4665 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4666 
4667 			rc = hl_poll_timeout(
4668 				hdev,
4669 				mmDMA0_CORE_STS0 + dma_offset,
4670 				busy,
4671 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4672 				1000,
4673 				HBM_SCRUBBING_TIMEOUT_US);
4674 
4675 			if (rc) {
4676 				dev_err(hdev->dev,
4677 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4678 					dma_id);
4679 				return -EIO;
4680 			}
4681 		}
4682 	}
4683 
4684 	return 0;
4685 }
4686 
4687 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4688 {
4689 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4690 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4691 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4692 	u64 addr, size, val = hdev->memory_scrub_val;
4693 	ktime_t timeout;
4694 	int rc = 0;
4695 
4696 	if (!hdev->memory_scrub)
4697 		return 0;
4698 
4699 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4700 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4701 		if (ktime_compare(ktime_get(), timeout) > 0) {
4702 			dev_err(hdev->dev, "waiting for idle timeout\n");
4703 			return -ETIMEDOUT;
4704 		}
4705 		usleep_range((1000 >> 2) + 1, 1000);
4706 	}
4707 
4708 	/* Scrub SRAM */
4709 	addr = prop->sram_user_base_address;
4710 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4711 
4712 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4713 			addr, addr + size, val);
4714 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4715 	if (rc) {
4716 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4717 		return rc;
4718 	}
4719 
4720 	/* Scrub HBM using all DMA channels in parallel */
4721 	rc = gaudi_scrub_device_dram(hdev, val);
4722 	if (rc) {
4723 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4724 		return rc;
4725 	}
4726 
4727 	return 0;
4728 }
4729 
4730 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4731 				u32 queue_id, dma_addr_t *dma_handle,
4732 				u16 *queue_len)
4733 {
4734 	struct gaudi_device *gaudi = hdev->asic_specific;
4735 	struct gaudi_internal_qman_info *q;
4736 
4737 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4738 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4739 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4740 		return NULL;
4741 	}
4742 
4743 	q = &gaudi->internal_qmans[queue_id];
4744 	*dma_handle = q->pq_dma_addr;
4745 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4746 
4747 	return q->pq_kernel_addr;
4748 }
4749 
4750 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4751 				u16 len, u32 timeout, u64 *result)
4752 {
4753 	struct gaudi_device *gaudi = hdev->asic_specific;
4754 
4755 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4756 		if (result)
4757 			*result = 0;
4758 		return 0;
4759 	}
4760 
4761 	if (!timeout)
4762 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4763 
4764 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4765 						timeout, result);
4766 }
4767 
4768 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4769 {
4770 	struct packet_msg_prot *fence_pkt;
4771 	dma_addr_t pkt_dma_addr;
4772 	u32 fence_val, tmp, timeout_usec;
4773 	dma_addr_t fence_dma_addr;
4774 	u32 *fence_ptr;
4775 	int rc;
4776 
4777 	if (hdev->pldm)
4778 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4779 	else
4780 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4781 
4782 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4783 
4784 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4785 	if (!fence_ptr) {
4786 		dev_err(hdev->dev,
4787 			"Failed to allocate memory for H/W queue %d testing\n",
4788 			hw_queue_id);
4789 		return -ENOMEM;
4790 	}
4791 
4792 	*fence_ptr = 0;
4793 
4794 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4795 						&pkt_dma_addr);
4796 	if (!fence_pkt) {
4797 		dev_err(hdev->dev,
4798 			"Failed to allocate packet for H/W queue %d testing\n",
4799 			hw_queue_id);
4800 		rc = -ENOMEM;
4801 		goto free_fence_ptr;
4802 	}
4803 
4804 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4805 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4806 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4807 
4808 	fence_pkt->ctl = cpu_to_le32(tmp);
4809 	fence_pkt->value = cpu_to_le32(fence_val);
4810 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4811 
4812 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4813 					sizeof(struct packet_msg_prot),
4814 					pkt_dma_addr);
4815 	if (rc) {
4816 		dev_err(hdev->dev,
4817 			"Failed to send fence packet to H/W queue %d\n",
4818 			hw_queue_id);
4819 		goto free_pkt;
4820 	}
4821 
4822 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4823 					1000, timeout_usec, true);
4824 
4825 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4826 
4827 	if (rc == -ETIMEDOUT) {
4828 		dev_err(hdev->dev,
4829 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4830 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4831 		rc = -EIO;
4832 	}
4833 
4834 free_pkt:
4835 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4836 free_fence_ptr:
4837 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4838 	return rc;
4839 }
4840 
4841 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4842 {
4843 	struct gaudi_device *gaudi = hdev->asic_specific;
4844 
4845 	/*
4846 	 * check capability here as send_cpu_message() won't update the result
4847 	 * value if no capability
4848 	 */
4849 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4850 		return 0;
4851 
4852 	return hl_fw_test_cpu_queue(hdev);
4853 }
4854 
4855 static int gaudi_test_queues(struct hl_device *hdev)
4856 {
4857 	int i, rc, ret_val = 0;
4858 
4859 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4860 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4861 			rc = gaudi_test_queue(hdev, i);
4862 			if (rc)
4863 				ret_val = -EINVAL;
4864 		}
4865 	}
4866 
4867 	rc = gaudi_test_cpu_queue(hdev);
4868 	if (rc)
4869 		ret_val = -EINVAL;
4870 
4871 	return ret_val;
4872 }
4873 
4874 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4875 		gfp_t mem_flags, dma_addr_t *dma_handle)
4876 {
4877 	void *kernel_addr;
4878 
4879 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4880 		return NULL;
4881 
4882 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4883 
4884 	/* Shift to the device's base physical address of host memory */
4885 	if (kernel_addr)
4886 		*dma_handle += HOST_PHYS_BASE;
4887 
4888 	return kernel_addr;
4889 }
4890 
4891 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4892 			dma_addr_t dma_addr)
4893 {
4894 	/* Cancel the device's base physical address of host memory */
4895 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4896 
4897 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4898 }
4899 
4900 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4901 					size_t size, dma_addr_t *dma_handle)
4902 {
4903 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4904 }
4905 
4906 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4907 						size_t size, void *vaddr)
4908 {
4909 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4910 }
4911 
4912 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4913 {
4914 	struct scatterlist *sg, *sg_next_iter;
4915 	u32 count, dma_desc_cnt;
4916 	u64 len, len_next;
4917 	dma_addr_t addr, addr_next;
4918 
4919 	dma_desc_cnt = 0;
4920 
4921 	for_each_sgtable_dma_sg(sgt, sg, count) {
4922 		len = sg_dma_len(sg);
4923 		addr = sg_dma_address(sg);
4924 
4925 		if (len == 0)
4926 			break;
4927 
4928 		while ((count + 1) < sgt->nents) {
4929 			sg_next_iter = sg_next(sg);
4930 			len_next = sg_dma_len(sg_next_iter);
4931 			addr_next = sg_dma_address(sg_next_iter);
4932 
4933 			if (len_next == 0)
4934 				break;
4935 
4936 			if ((addr + len == addr_next) &&
4937 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4938 				len += len_next;
4939 				count++;
4940 				sg = sg_next_iter;
4941 			} else {
4942 				break;
4943 			}
4944 		}
4945 
4946 		dma_desc_cnt++;
4947 	}
4948 
4949 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4950 }
4951 
4952 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4953 				struct hl_cs_parser *parser,
4954 				struct packet_lin_dma *user_dma_pkt,
4955 				u64 addr, enum dma_data_direction dir)
4956 {
4957 	struct hl_userptr *userptr;
4958 	int rc;
4959 
4960 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4961 			parser->job_userptr_list, &userptr))
4962 		goto already_pinned;
4963 
4964 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4965 	if (!userptr)
4966 		return -ENOMEM;
4967 
4968 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4969 				userptr);
4970 	if (rc)
4971 		goto free_userptr;
4972 
4973 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4974 
4975 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4976 	if (rc) {
4977 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4978 		goto unpin_memory;
4979 	}
4980 
4981 	userptr->dma_mapped = true;
4982 	userptr->dir = dir;
4983 
4984 already_pinned:
4985 	parser->patched_cb_size +=
4986 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4987 
4988 	return 0;
4989 
4990 unpin_memory:
4991 	list_del(&userptr->job_node);
4992 	hl_unpin_host_memory(hdev, userptr);
4993 free_userptr:
4994 	kfree(userptr);
4995 	return rc;
4996 }
4997 
4998 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4999 				struct hl_cs_parser *parser,
5000 				struct packet_lin_dma *user_dma_pkt,
5001 				bool src_in_host)
5002 {
5003 	enum dma_data_direction dir;
5004 	bool skip_host_mem_pin = false, user_memset;
5005 	u64 addr;
5006 	int rc = 0;
5007 
5008 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5009 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5010 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5011 
5012 	if (src_in_host) {
5013 		if (user_memset)
5014 			skip_host_mem_pin = true;
5015 
5016 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5017 		dir = DMA_TO_DEVICE;
5018 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5019 	} else {
5020 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5021 		dir = DMA_FROM_DEVICE;
5022 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5023 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5024 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5025 	}
5026 
5027 	if (skip_host_mem_pin)
5028 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5029 	else
5030 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5031 						addr, dir);
5032 
5033 	return rc;
5034 }
5035 
5036 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5037 				struct hl_cs_parser *parser,
5038 				struct packet_lin_dma *user_dma_pkt)
5039 {
5040 	bool src_in_host = false;
5041 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5042 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5043 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5044 
5045 	dev_dbg(hdev->dev, "DMA packet details:\n");
5046 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5047 				le64_to_cpu(user_dma_pkt->src_addr));
5048 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5049 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5050 
5051 	/*
5052 	 * Special handling for DMA with size 0. Bypass all validations
5053 	 * because no transactions will be done except for WR_COMP, which
5054 	 * is not a security issue
5055 	 */
5056 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5057 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5058 		return 0;
5059 	}
5060 
5061 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5062 		src_in_host = true;
5063 
5064 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5065 						src_in_host);
5066 }
5067 
5068 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5069 					struct hl_cs_parser *parser,
5070 					struct packet_load_and_exe *user_pkt)
5071 {
5072 	u32 cfg;
5073 
5074 	cfg = le32_to_cpu(user_pkt->cfg);
5075 
5076 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5077 		dev_err(hdev->dev,
5078 			"User not allowed to use Load and Execute\n");
5079 		return -EPERM;
5080 	}
5081 
5082 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5083 
5084 	return 0;
5085 }
5086 
5087 static int gaudi_validate_cb(struct hl_device *hdev,
5088 			struct hl_cs_parser *parser, bool is_mmu)
5089 {
5090 	u32 cb_parsed_length = 0;
5091 	int rc = 0;
5092 
5093 	parser->patched_cb_size = 0;
5094 
5095 	/* cb_user_size is more than 0 so loop will always be executed */
5096 	while (cb_parsed_length < parser->user_cb_size) {
5097 		enum packet_id pkt_id;
5098 		u16 pkt_size;
5099 		struct gaudi_packet *user_pkt;
5100 
5101 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5102 
5103 		pkt_id = (enum packet_id) (
5104 				(le64_to_cpu(user_pkt->header) &
5105 				PACKET_HEADER_PACKET_ID_MASK) >>
5106 					PACKET_HEADER_PACKET_ID_SHIFT);
5107 
5108 		if (!validate_packet_id(pkt_id)) {
5109 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5110 			rc = -EINVAL;
5111 			break;
5112 		}
5113 
5114 		pkt_size = gaudi_packet_sizes[pkt_id];
5115 		cb_parsed_length += pkt_size;
5116 		if (cb_parsed_length > parser->user_cb_size) {
5117 			dev_err(hdev->dev,
5118 				"packet 0x%x is out of CB boundary\n", pkt_id);
5119 			rc = -EINVAL;
5120 			break;
5121 		}
5122 
5123 		switch (pkt_id) {
5124 		case PACKET_MSG_PROT:
5125 			dev_err(hdev->dev,
5126 				"User not allowed to use MSG_PROT\n");
5127 			rc = -EPERM;
5128 			break;
5129 
5130 		case PACKET_CP_DMA:
5131 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5132 			rc = -EPERM;
5133 			break;
5134 
5135 		case PACKET_STOP:
5136 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5137 			rc = -EPERM;
5138 			break;
5139 
5140 		case PACKET_WREG_BULK:
5141 			dev_err(hdev->dev,
5142 				"User not allowed to use WREG_BULK\n");
5143 			rc = -EPERM;
5144 			break;
5145 
5146 		case PACKET_LOAD_AND_EXE:
5147 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5148 				(struct packet_load_and_exe *) user_pkt);
5149 			break;
5150 
5151 		case PACKET_LIN_DMA:
5152 			parser->contains_dma_pkt = true;
5153 			if (is_mmu)
5154 				parser->patched_cb_size += pkt_size;
5155 			else
5156 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5157 					(struct packet_lin_dma *) user_pkt);
5158 			break;
5159 
5160 		case PACKET_WREG_32:
5161 		case PACKET_MSG_LONG:
5162 		case PACKET_MSG_SHORT:
5163 		case PACKET_REPEAT:
5164 		case PACKET_FENCE:
5165 		case PACKET_NOP:
5166 		case PACKET_ARB_POINT:
5167 			parser->patched_cb_size += pkt_size;
5168 			break;
5169 
5170 		default:
5171 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5172 				pkt_id);
5173 			rc = -EINVAL;
5174 			break;
5175 		}
5176 
5177 		if (rc)
5178 			break;
5179 	}
5180 
5181 	/*
5182 	 * The new CB should have space at the end for two MSG_PROT packets:
5183 	 * 1. Optional NOP padding for cacheline alignment
5184 	 * 2. A packet that will act as a completion packet
5185 	 * 3. A packet that will generate MSI interrupt
5186 	 */
5187 	if (parser->completion)
5188 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5189 			parser->patched_cb_size);
5190 
5191 	return rc;
5192 }
5193 
5194 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5195 				struct hl_cs_parser *parser,
5196 				struct packet_lin_dma *user_dma_pkt,
5197 				struct packet_lin_dma *new_dma_pkt,
5198 				u32 *new_dma_pkt_size)
5199 {
5200 	struct hl_userptr *userptr;
5201 	struct scatterlist *sg, *sg_next_iter;
5202 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5203 	u64 len, len_next;
5204 	dma_addr_t dma_addr, dma_addr_next;
5205 	u64 device_memory_addr, addr;
5206 	enum dma_data_direction dir;
5207 	struct sg_table *sgt;
5208 	bool src_in_host = false;
5209 	bool skip_host_mem_pin = false;
5210 	bool user_memset;
5211 
5212 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5213 
5214 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5215 		src_in_host = true;
5216 
5217 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5218 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5219 
5220 	if (src_in_host) {
5221 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5222 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5223 		dir = DMA_TO_DEVICE;
5224 		if (user_memset)
5225 			skip_host_mem_pin = true;
5226 	} else {
5227 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5228 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5229 		dir = DMA_FROM_DEVICE;
5230 	}
5231 
5232 	if ((!skip_host_mem_pin) &&
5233 		(!hl_userptr_is_pinned(hdev, addr,
5234 					le32_to_cpu(user_dma_pkt->tsize),
5235 					parser->job_userptr_list, &userptr))) {
5236 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5237 				addr, user_dma_pkt->tsize);
5238 		return -EFAULT;
5239 	}
5240 
5241 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5242 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5243 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5244 		return 0;
5245 	}
5246 
5247 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5248 
5249 	sgt = userptr->sgt;
5250 	dma_desc_cnt = 0;
5251 
5252 	for_each_sgtable_dma_sg(sgt, sg, count) {
5253 		len = sg_dma_len(sg);
5254 		dma_addr = sg_dma_address(sg);
5255 
5256 		if (len == 0)
5257 			break;
5258 
5259 		while ((count + 1) < sgt->nents) {
5260 			sg_next_iter = sg_next(sg);
5261 			len_next = sg_dma_len(sg_next_iter);
5262 			dma_addr_next = sg_dma_address(sg_next_iter);
5263 
5264 			if (len_next == 0)
5265 				break;
5266 
5267 			if ((dma_addr + len == dma_addr_next) &&
5268 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5269 				len += len_next;
5270 				count++;
5271 				sg = sg_next_iter;
5272 			} else {
5273 				break;
5274 			}
5275 		}
5276 
5277 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5278 		if (likely(dma_desc_cnt))
5279 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5280 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5281 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5282 		new_dma_pkt->tsize = cpu_to_le32(len);
5283 
5284 		if (dir == DMA_TO_DEVICE) {
5285 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5286 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5287 		} else {
5288 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5289 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5290 		}
5291 
5292 		if (!user_memset)
5293 			device_memory_addr += len;
5294 		dma_desc_cnt++;
5295 		new_dma_pkt++;
5296 	}
5297 
5298 	if (!dma_desc_cnt) {
5299 		dev_err(hdev->dev,
5300 			"Error of 0 SG entries when patching DMA packet\n");
5301 		return -EFAULT;
5302 	}
5303 
5304 	/* Fix the last dma packet - wrcomp must be as user set it */
5305 	new_dma_pkt--;
5306 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5307 
5308 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5309 
5310 	return 0;
5311 }
5312 
5313 static int gaudi_patch_cb(struct hl_device *hdev,
5314 				struct hl_cs_parser *parser)
5315 {
5316 	u32 cb_parsed_length = 0;
5317 	u32 cb_patched_cur_length = 0;
5318 	int rc = 0;
5319 
5320 	/* cb_user_size is more than 0 so loop will always be executed */
5321 	while (cb_parsed_length < parser->user_cb_size) {
5322 		enum packet_id pkt_id;
5323 		u16 pkt_size;
5324 		u32 new_pkt_size = 0;
5325 		struct gaudi_packet *user_pkt, *kernel_pkt;
5326 
5327 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5328 		kernel_pkt = parser->patched_cb->kernel_address +
5329 					cb_patched_cur_length;
5330 
5331 		pkt_id = (enum packet_id) (
5332 				(le64_to_cpu(user_pkt->header) &
5333 				PACKET_HEADER_PACKET_ID_MASK) >>
5334 					PACKET_HEADER_PACKET_ID_SHIFT);
5335 
5336 		if (!validate_packet_id(pkt_id)) {
5337 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5338 			rc = -EINVAL;
5339 			break;
5340 		}
5341 
5342 		pkt_size = gaudi_packet_sizes[pkt_id];
5343 		cb_parsed_length += pkt_size;
5344 		if (cb_parsed_length > parser->user_cb_size) {
5345 			dev_err(hdev->dev,
5346 				"packet 0x%x is out of CB boundary\n", pkt_id);
5347 			rc = -EINVAL;
5348 			break;
5349 		}
5350 
5351 		switch (pkt_id) {
5352 		case PACKET_LIN_DMA:
5353 			rc = gaudi_patch_dma_packet(hdev, parser,
5354 					(struct packet_lin_dma *) user_pkt,
5355 					(struct packet_lin_dma *) kernel_pkt,
5356 					&new_pkt_size);
5357 			cb_patched_cur_length += new_pkt_size;
5358 			break;
5359 
5360 		case PACKET_MSG_PROT:
5361 			dev_err(hdev->dev,
5362 				"User not allowed to use MSG_PROT\n");
5363 			rc = -EPERM;
5364 			break;
5365 
5366 		case PACKET_CP_DMA:
5367 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5368 			rc = -EPERM;
5369 			break;
5370 
5371 		case PACKET_STOP:
5372 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5373 			rc = -EPERM;
5374 			break;
5375 
5376 		case PACKET_WREG_32:
5377 		case PACKET_WREG_BULK:
5378 		case PACKET_MSG_LONG:
5379 		case PACKET_MSG_SHORT:
5380 		case PACKET_REPEAT:
5381 		case PACKET_FENCE:
5382 		case PACKET_NOP:
5383 		case PACKET_ARB_POINT:
5384 		case PACKET_LOAD_AND_EXE:
5385 			memcpy(kernel_pkt, user_pkt, pkt_size);
5386 			cb_patched_cur_length += pkt_size;
5387 			break;
5388 
5389 		default:
5390 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5391 				pkt_id);
5392 			rc = -EINVAL;
5393 			break;
5394 		}
5395 
5396 		if (rc)
5397 			break;
5398 	}
5399 
5400 	return rc;
5401 }
5402 
5403 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5404 		struct hl_cs_parser *parser)
5405 {
5406 	u64 handle;
5407 	u32 patched_cb_size;
5408 	struct hl_cb *user_cb;
5409 	int rc;
5410 
5411 	/*
5412 	 * The new CB should have space at the end for two MSG_PROT packets:
5413 	 * 1. Optional NOP padding for cacheline alignment
5414 	 * 2. A packet that will act as a completion packet
5415 	 * 3. A packet that will generate MSI interrupt
5416 	 */
5417 	if (parser->completion)
5418 		parser->patched_cb_size = parser->user_cb_size +
5419 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5420 	else
5421 		parser->patched_cb_size = parser->user_cb_size;
5422 
5423 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5424 				parser->patched_cb_size, false, false,
5425 				&handle);
5426 
5427 	if (rc) {
5428 		dev_err(hdev->dev,
5429 			"Failed to allocate patched CB for DMA CS %d\n",
5430 			rc);
5431 		return rc;
5432 	}
5433 
5434 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5435 	/* hl_cb_get should never fail */
5436 	if (!parser->patched_cb) {
5437 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5438 		rc = -EFAULT;
5439 		goto out;
5440 	}
5441 
5442 	/*
5443 	 * We are protected from overflow because the check
5444 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5445 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5446 	 *
5447 	 * There is no option to reach here without going through that check because:
5448 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5449 	 *    an external queue.
5450 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5451 	 */
5452 	memcpy(parser->patched_cb->kernel_address,
5453 		parser->user_cb->kernel_address,
5454 		parser->user_cb_size);
5455 
5456 	patched_cb_size = parser->patched_cb_size;
5457 
5458 	/* Validate patched CB instead of user CB */
5459 	user_cb = parser->user_cb;
5460 	parser->user_cb = parser->patched_cb;
5461 	rc = gaudi_validate_cb(hdev, parser, true);
5462 	parser->user_cb = user_cb;
5463 
5464 	if (rc) {
5465 		hl_cb_put(parser->patched_cb);
5466 		goto out;
5467 	}
5468 
5469 	if (patched_cb_size != parser->patched_cb_size) {
5470 		dev_err(hdev->dev, "user CB size mismatch\n");
5471 		hl_cb_put(parser->patched_cb);
5472 		rc = -EINVAL;
5473 		goto out;
5474 	}
5475 
5476 out:
5477 	/*
5478 	 * Always call cb destroy here because we still have 1 reference
5479 	 * to it by calling cb_get earlier. After the job will be completed,
5480 	 * cb_put will release it, but here we want to remove it from the
5481 	 * idr
5482 	 */
5483 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5484 
5485 	return rc;
5486 }
5487 
5488 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5489 		struct hl_cs_parser *parser)
5490 {
5491 	u64 handle;
5492 	int rc;
5493 
5494 	rc = gaudi_validate_cb(hdev, parser, false);
5495 
5496 	if (rc)
5497 		goto free_userptr;
5498 
5499 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5500 				parser->patched_cb_size, false, false,
5501 				&handle);
5502 	if (rc) {
5503 		dev_err(hdev->dev,
5504 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5505 		goto free_userptr;
5506 	}
5507 
5508 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5509 	/* hl_cb_get should never fail here */
5510 	if (!parser->patched_cb) {
5511 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5512 		rc = -EFAULT;
5513 		goto out;
5514 	}
5515 
5516 	rc = gaudi_patch_cb(hdev, parser);
5517 
5518 	if (rc)
5519 		hl_cb_put(parser->patched_cb);
5520 
5521 out:
5522 	/*
5523 	 * Always call cb destroy here because we still have 1 reference
5524 	 * to it by calling cb_get earlier. After the job will be completed,
5525 	 * cb_put will release it, but here we want to remove it from the
5526 	 * idr
5527 	 */
5528 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5529 
5530 free_userptr:
5531 	if (rc)
5532 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5533 	return rc;
5534 }
5535 
5536 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5537 					struct hl_cs_parser *parser)
5538 {
5539 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5540 	struct gaudi_device *gaudi = hdev->asic_specific;
5541 	u32 nic_queue_offset, nic_mask_q_id;
5542 
5543 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5544 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5545 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5546 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5547 
5548 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5549 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5550 			return -EINVAL;
5551 		}
5552 	}
5553 
5554 	/* For internal queue jobs just check if CB address is valid */
5555 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5556 					parser->user_cb_size,
5557 					asic_prop->sram_user_base_address,
5558 					asic_prop->sram_end_address))
5559 		return 0;
5560 
5561 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5562 					parser->user_cb_size,
5563 					asic_prop->dram_user_base_address,
5564 					asic_prop->dram_end_address))
5565 		return 0;
5566 
5567 	/* PMMU and HPMMU addresses are equal, check only one of them */
5568 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5569 					parser->user_cb_size,
5570 					asic_prop->pmmu.start_addr,
5571 					asic_prop->pmmu.end_addr))
5572 		return 0;
5573 
5574 	dev_err(hdev->dev,
5575 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5576 		parser->user_cb, parser->user_cb_size);
5577 
5578 	return -EFAULT;
5579 }
5580 
5581 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5582 {
5583 	struct gaudi_device *gaudi = hdev->asic_specific;
5584 
5585 	if (parser->queue_type == QUEUE_TYPE_INT)
5586 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5587 
5588 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5589 		return gaudi_parse_cb_mmu(hdev, parser);
5590 	else
5591 		return gaudi_parse_cb_no_mmu(hdev, parser);
5592 }
5593 
5594 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5595 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5596 				u32 msi_vec, bool eb)
5597 {
5598 	struct gaudi_device *gaudi = hdev->asic_specific;
5599 	struct packet_msg_prot *cq_pkt;
5600 	struct packet_nop *cq_padding;
5601 	u64 msi_addr;
5602 	u32 tmp;
5603 
5604 	cq_padding = kernel_address + original_len;
5605 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5606 
5607 	while ((void *)cq_padding < (void *)cq_pkt) {
5608 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5609 		cq_padding++;
5610 	}
5611 
5612 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5613 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5614 
5615 	if (eb)
5616 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5617 
5618 	cq_pkt->ctl = cpu_to_le32(tmp);
5619 	cq_pkt->value = cpu_to_le32(cq_val);
5620 	cq_pkt->addr = cpu_to_le64(cq_addr);
5621 
5622 	cq_pkt++;
5623 
5624 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5625 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5626 	cq_pkt->ctl = cpu_to_le32(tmp);
5627 	cq_pkt->value = cpu_to_le32(1);
5628 
5629 	if (gaudi->multi_msi_mode)
5630 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5631 	else
5632 		msi_addr = mmPCIE_CORE_MSI_REQ;
5633 
5634 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5635 }
5636 
5637 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5638 {
5639 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5640 }
5641 
5642 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5643 					u32 size, u64 val)
5644 {
5645 	struct packet_lin_dma *lin_dma_pkt;
5646 	struct hl_cs_job *job;
5647 	u32 cb_size, ctl, err_cause;
5648 	struct hl_cb *cb;
5649 	int rc;
5650 
5651 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5652 	if (!cb)
5653 		return -EFAULT;
5654 
5655 	lin_dma_pkt = cb->kernel_address;
5656 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5657 	cb_size = sizeof(*lin_dma_pkt);
5658 
5659 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5660 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5661 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5662 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5663 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5664 
5665 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5666 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5667 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5668 	lin_dma_pkt->tsize = cpu_to_le32(size);
5669 
5670 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5671 	if (!job) {
5672 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5673 		rc = -ENOMEM;
5674 		goto release_cb;
5675 	}
5676 
5677 	/* Verify DMA is OK */
5678 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5679 	if (err_cause && !hdev->init_done) {
5680 		dev_dbg(hdev->dev,
5681 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5682 			err_cause);
5683 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5684 	}
5685 
5686 	job->id = 0;
5687 	job->user_cb = cb;
5688 	atomic_inc(&job->user_cb->cs_cnt);
5689 	job->user_cb_size = cb_size;
5690 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5691 	job->patched_cb = job->user_cb;
5692 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5693 
5694 	hl_debugfs_add_job(hdev, job);
5695 
5696 	rc = gaudi_send_job_on_qman0(hdev, job);
5697 	hl_debugfs_remove_job(hdev, job);
5698 	kfree(job);
5699 	atomic_dec(&cb->cs_cnt);
5700 
5701 	/* Verify DMA is OK */
5702 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5703 	if (err_cause) {
5704 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5705 		rc = -EIO;
5706 		if (!hdev->init_done) {
5707 			dev_dbg(hdev->dev,
5708 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5709 				err_cause);
5710 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5711 		}
5712 	}
5713 
5714 release_cb:
5715 	hl_cb_put(cb);
5716 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5717 
5718 	return rc;
5719 }
5720 
5721 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5722 					u32 num_regs, u32 val)
5723 {
5724 	struct packet_msg_long *pkt;
5725 	struct hl_cs_job *job;
5726 	u32 cb_size, ctl;
5727 	struct hl_cb *cb;
5728 	int i, rc;
5729 
5730 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5731 
5732 	if (cb_size > SZ_2M) {
5733 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5734 		return -ENOMEM;
5735 	}
5736 
5737 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5738 	if (!cb)
5739 		return -EFAULT;
5740 
5741 	pkt = cb->kernel_address;
5742 
5743 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5744 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5745 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5746 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5747 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5748 
5749 	for (i = 0; i < num_regs ; i++, pkt++) {
5750 		pkt->ctl = cpu_to_le32(ctl);
5751 		pkt->value = cpu_to_le32(val);
5752 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5753 	}
5754 
5755 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5756 	if (!job) {
5757 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5758 		rc = -ENOMEM;
5759 		goto release_cb;
5760 	}
5761 
5762 	job->id = 0;
5763 	job->user_cb = cb;
5764 	atomic_inc(&job->user_cb->cs_cnt);
5765 	job->user_cb_size = cb_size;
5766 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5767 	job->patched_cb = job->user_cb;
5768 	job->job_cb_size = cb_size;
5769 
5770 	hl_debugfs_add_job(hdev, job);
5771 
5772 	rc = gaudi_send_job_on_qman0(hdev, job);
5773 	hl_debugfs_remove_job(hdev, job);
5774 	kfree(job);
5775 	atomic_dec(&cb->cs_cnt);
5776 
5777 release_cb:
5778 	hl_cb_put(cb);
5779 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5780 
5781 	return rc;
5782 }
5783 
5784 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5785 {
5786 	u64 base_addr;
5787 	u32 num_regs;
5788 	int rc;
5789 
5790 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5791 	num_regs = NUM_OF_SOB_IN_BLOCK;
5792 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5793 	if (rc) {
5794 		dev_err(hdev->dev, "failed resetting SM registers");
5795 		return -ENOMEM;
5796 	}
5797 
5798 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5799 	num_regs = NUM_OF_SOB_IN_BLOCK;
5800 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5801 	if (rc) {
5802 		dev_err(hdev->dev, "failed resetting SM registers");
5803 		return -ENOMEM;
5804 	}
5805 
5806 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5807 	num_regs = NUM_OF_SOB_IN_BLOCK;
5808 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5809 	if (rc) {
5810 		dev_err(hdev->dev, "failed resetting SM registers");
5811 		return -ENOMEM;
5812 	}
5813 
5814 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5815 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5816 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5817 	if (rc) {
5818 		dev_err(hdev->dev, "failed resetting SM registers");
5819 		return -ENOMEM;
5820 	}
5821 
5822 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5823 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5824 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5825 	if (rc) {
5826 		dev_err(hdev->dev, "failed resetting SM registers");
5827 		return -ENOMEM;
5828 	}
5829 
5830 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5831 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5832 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5833 	if (rc) {
5834 		dev_err(hdev->dev, "failed resetting SM registers");
5835 		return -ENOMEM;
5836 	}
5837 
5838 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5839 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5840 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5841 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5842 	if (rc) {
5843 		dev_err(hdev->dev, "failed resetting SM registers");
5844 		return -ENOMEM;
5845 	}
5846 
5847 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5848 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5849 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5850 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5851 	if (rc) {
5852 		dev_err(hdev->dev, "failed resetting SM registers");
5853 		return -ENOMEM;
5854 	}
5855 
5856 	return 0;
5857 }
5858 
5859 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5860 {
5861 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5862 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5863 	int i;
5864 
5865 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5866 		u64 sob_addr = CFG_BASE +
5867 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5868 				(i * sob_delta);
5869 		u32 dma_offset = i * DMA_CORE_OFFSET;
5870 
5871 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5872 				lower_32_bits(sob_addr));
5873 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5874 				upper_32_bits(sob_addr));
5875 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5876 
5877 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5878 		 * modified by the user for SRAM reduction
5879 		 */
5880 		if (i > 1)
5881 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5882 								0x00000001);
5883 	}
5884 }
5885 
5886 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5887 {
5888 	u32 qman_offset;
5889 	int i;
5890 
5891 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5892 		qman_offset = i * DMA_QMAN_OFFSET;
5893 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5894 	}
5895 
5896 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5897 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5898 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5899 	}
5900 
5901 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5902 		qman_offset = i * TPC_QMAN_OFFSET;
5903 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5904 	}
5905 
5906 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5907 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5908 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5909 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5910 	}
5911 }
5912 
5913 static int gaudi_restore_user_registers(struct hl_device *hdev)
5914 {
5915 	int rc;
5916 
5917 	rc = gaudi_restore_sm_registers(hdev);
5918 	if (rc)
5919 		return rc;
5920 
5921 	gaudi_restore_dma_registers(hdev);
5922 	gaudi_restore_qm_registers(hdev);
5923 
5924 	return 0;
5925 }
5926 
5927 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5928 {
5929 	return 0;
5930 }
5931 
5932 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5933 {
5934 	u32 size = hdev->asic_prop.mmu_pgt_size +
5935 			hdev->asic_prop.mmu_cache_mng_size;
5936 	struct gaudi_device *gaudi = hdev->asic_specific;
5937 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5938 
5939 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5940 		return 0;
5941 
5942 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5943 }
5944 
5945 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5946 {
5947 
5948 }
5949 
5950 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5951 					u32 size_to_dma, dma_addr_t dma_addr)
5952 {
5953 	u32 err_cause, val;
5954 	u64 dma_offset;
5955 	int rc;
5956 
5957 	dma_offset = dma_id * DMA_CORE_OFFSET;
5958 
5959 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5960 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5961 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5962 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5963 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5964 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5965 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5966 
5967 	rc = hl_poll_timeout(
5968 		hdev,
5969 		mmDMA0_CORE_STS0 + dma_offset,
5970 		val,
5971 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5972 		0,
5973 		1000000);
5974 
5975 	if (rc) {
5976 		dev_err(hdev->dev,
5977 			"DMA %d timed-out during reading of 0x%llx\n",
5978 			dma_id, addr);
5979 		return -EIO;
5980 	}
5981 
5982 	/* Verify DMA is OK */
5983 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5984 	if (err_cause) {
5985 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5986 		dev_dbg(hdev->dev,
5987 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5988 			err_cause);
5989 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5990 
5991 		return -EIO;
5992 	}
5993 
5994 	return 0;
5995 }
5996 
5997 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5998 				void *blob_addr)
5999 {
6000 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6001 	u32 qm_glbl_sts0, qm_cgm_sts;
6002 	u64 dma_offset, qm_offset;
6003 	dma_addr_t dma_addr;
6004 	void *kernel_addr;
6005 	bool is_eng_idle;
6006 	int rc = 0, dma_id;
6007 
6008 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6009 
6010 	if (!kernel_addr)
6011 		return -ENOMEM;
6012 
6013 	hdev->asic_funcs->hw_queues_lock(hdev);
6014 
6015 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6016 	dma_offset = dma_id * DMA_CORE_OFFSET;
6017 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6018 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6019 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6020 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6021 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6022 		      IS_DMA_IDLE(dma_core_sts0);
6023 
6024 	if (!is_eng_idle) {
6025 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6026 		dma_offset = dma_id * DMA_CORE_OFFSET;
6027 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6028 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6029 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6030 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6031 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6032 			      IS_DMA_IDLE(dma_core_sts0);
6033 
6034 		if (!is_eng_idle) {
6035 			dev_err_ratelimited(hdev->dev,
6036 				"Can't read via DMA because it is BUSY\n");
6037 			rc = -EAGAIN;
6038 			goto out;
6039 		}
6040 	}
6041 
6042 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6043 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6044 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6045 
6046 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6047 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6048 	 * ASID
6049 	 */
6050 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6051 
6052 	/* Verify DMA is OK */
6053 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6054 	if (err_cause) {
6055 		dev_dbg(hdev->dev,
6056 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6057 			err_cause);
6058 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6059 	}
6060 
6061 	pos = 0;
6062 	size_left = size;
6063 	size_to_dma = SZ_2M;
6064 
6065 	while (size_left > 0) {
6066 
6067 		if (size_left < SZ_2M)
6068 			size_to_dma = size_left;
6069 
6070 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6071 						dma_addr);
6072 		if (rc)
6073 			break;
6074 
6075 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6076 
6077 		if (size_left <= SZ_2M)
6078 			break;
6079 
6080 		pos += SZ_2M;
6081 		addr += SZ_2M;
6082 		size_left -= SZ_2M;
6083 	}
6084 
6085 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6086 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6087 	 * ASID
6088 	 */
6089 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6090 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6091 
6092 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6093 
6094 out:
6095 	hdev->asic_funcs->hw_queues_unlock(hdev);
6096 
6097 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6098 
6099 	return rc;
6100 }
6101 
6102 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6103 {
6104 	struct gaudi_device *gaudi = hdev->asic_specific;
6105 
6106 	if (hdev->reset_info.hard_reset_pending)
6107 		return U64_MAX;
6108 
6109 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6110 			(addr - gaudi->hbm_bar_cur_addr));
6111 }
6112 
6113 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6114 {
6115 	struct gaudi_device *gaudi = hdev->asic_specific;
6116 
6117 	if (hdev->reset_info.hard_reset_pending)
6118 		return;
6119 
6120 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6121 			(addr - gaudi->hbm_bar_cur_addr));
6122 }
6123 
6124 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6125 {
6126 	/* mask to zero the MMBP and ASID bits */
6127 	WREG32_AND(reg, ~0x7FF);
6128 	WREG32_OR(reg, asid);
6129 }
6130 
6131 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6132 {
6133 	struct gaudi_device *gaudi = hdev->asic_specific;
6134 
6135 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6136 		return;
6137 
6138 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6139 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6140 		return;
6141 	}
6142 
6143 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6144 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6146 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6148 
6149 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6152 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6154 
6155 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160 
6161 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166 
6167 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6168 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6172 
6173 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6176 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6178 
6179 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6184 
6185 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6190 
6191 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6199 
6200 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6207 
6208 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6213 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6214 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6215 
6216 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6221 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6222 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6223 
6224 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6225 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6226 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6227 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6228 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6229 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6230 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6231 
6232 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6233 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6234 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6235 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6236 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6237 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6238 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6239 
6240 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6241 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6242 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6243 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6244 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6245 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6246 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6247 
6248 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6249 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6250 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6251 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6252 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6253 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6254 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6255 
6256 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6257 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6258 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6259 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6260 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6261 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6262 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6263 
6264 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6265 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6266 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6267 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6268 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6269 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6270 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6271 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6272 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6273 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6274 
6275 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6276 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6277 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6278 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6279 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6280 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6281 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6282 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6283 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6284 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6285 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6286 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6287 
6288 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6289 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6290 				asid);
6291 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6292 				asid);
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6296 				asid);
6297 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6298 				asid);
6299 	}
6300 
6301 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6305 				asid);
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6309 				asid);
6310 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6311 				asid);
6312 	}
6313 
6314 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6318 				asid);
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6322 				asid);
6323 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6324 				asid);
6325 	}
6326 
6327 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6331 				asid);
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6335 				asid);
6336 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6337 				asid);
6338 	}
6339 
6340 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6341 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6342 				asid);
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6344 				asid);
6345 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6346 				asid);
6347 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6348 				asid);
6349 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6350 				asid);
6351 	}
6352 
6353 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6354 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6355 				asid);
6356 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6357 				asid);
6358 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6359 				asid);
6360 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6361 				asid);
6362 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6363 				asid);
6364 	}
6365 
6366 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6367 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6368 				asid);
6369 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6370 				asid);
6371 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6372 				asid);
6373 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6374 				asid);
6375 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6376 				asid);
6377 	}
6378 
6379 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6380 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6381 				asid);
6382 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6383 				asid);
6384 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6385 				asid);
6386 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6387 				asid);
6388 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6389 				asid);
6390 	}
6391 
6392 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6393 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6394 				asid);
6395 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6396 				asid);
6397 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6398 				asid);
6399 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6400 				asid);
6401 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6402 				asid);
6403 	}
6404 
6405 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6406 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6407 				asid);
6408 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6409 				asid);
6410 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6411 				asid);
6412 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6413 				asid);
6414 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6415 				asid);
6416 	}
6417 
6418 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6419 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6420 }
6421 
6422 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6423 		struct hl_cs_job *job)
6424 {
6425 	struct packet_msg_prot *fence_pkt;
6426 	u32 *fence_ptr;
6427 	dma_addr_t fence_dma_addr;
6428 	struct hl_cb *cb;
6429 	u32 tmp, timeout, dma_offset;
6430 	int rc;
6431 
6432 	if (hdev->pldm)
6433 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6434 	else
6435 		timeout = HL_DEVICE_TIMEOUT_USEC;
6436 
6437 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6438 	if (!fence_ptr) {
6439 		dev_err(hdev->dev,
6440 			"Failed to allocate fence memory for QMAN0\n");
6441 		return -ENOMEM;
6442 	}
6443 
6444 	cb = job->patched_cb;
6445 
6446 	fence_pkt = cb->kernel_address +
6447 			job->job_cb_size - sizeof(struct packet_msg_prot);
6448 
6449 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6450 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6451 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6452 
6453 	fence_pkt->ctl = cpu_to_le32(tmp);
6454 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6455 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6456 
6457 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6458 
6459 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6460 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6461 
6462 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6463 					job->job_cb_size, cb->bus_address);
6464 	if (rc) {
6465 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6466 		goto free_fence_ptr;
6467 	}
6468 
6469 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6470 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6471 				timeout, true);
6472 
6473 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6474 
6475 	if (rc == -ETIMEDOUT) {
6476 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6477 		goto free_fence_ptr;
6478 	}
6479 
6480 free_fence_ptr:
6481 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6482 
6483 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6484 	return rc;
6485 }
6486 
6487 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6488 {
6489 	if (event_type >= GAUDI_EVENT_SIZE)
6490 		goto event_not_supported;
6491 
6492 	if (!gaudi_irq_map_table[event_type].valid)
6493 		goto event_not_supported;
6494 
6495 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6496 
6497 	return;
6498 
6499 event_not_supported:
6500 	snprintf(desc, size, "N/A");
6501 }
6502 
6503 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6504 							bool is_write, u16 *engine_id_1,
6505 							u16 *engine_id_2)
6506 {
6507 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6508 
6509 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6510 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6511 
6512 	switch (x_y) {
6513 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6514 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6515 		dma_id[0] = 0;
6516 		dma_id[1] = 2;
6517 		break;
6518 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6519 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6520 		dma_id[0] = 1;
6521 		dma_id[1] = 3;
6522 		break;
6523 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6524 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6525 		dma_id[0] = 4;
6526 		dma_id[1] = 6;
6527 		break;
6528 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6529 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6530 		dma_id[0] = 5;
6531 		dma_id[1] = 7;
6532 		break;
6533 	default:
6534 		goto unknown_initiator;
6535 	}
6536 
6537 	for (i = 0 ; i < 2 ; i++) {
6538 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6539 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6540 	}
6541 
6542 	switch (x_y) {
6543 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6544 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6545 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6546 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6547 			return "DMA0";
6548 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6549 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6550 			return "DMA2";
6551 		} else {
6552 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6553 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6554 			return "DMA0 or DMA2";
6555 		}
6556 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6557 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6558 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6559 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6560 			return "DMA1";
6561 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6562 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6563 			return "DMA3";
6564 		} else {
6565 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6566 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6567 			return "DMA1 or DMA3";
6568 		}
6569 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6570 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6571 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6572 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6573 			return "DMA4";
6574 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6575 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6576 			return "DMA6";
6577 		} else {
6578 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6579 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6580 			return "DMA4 or DMA6";
6581 		}
6582 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6583 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6584 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6585 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6586 			return "DMA5";
6587 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6588 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6589 			return "DMA7";
6590 		} else {
6591 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6592 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6593 			return "DMA5 or DMA7";
6594 		}
6595 	}
6596 
6597 unknown_initiator:
6598 	return "unknown initiator";
6599 }
6600 
6601 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6602 							u16 *engine_id_1, u16 *engine_id_2)
6603 {
6604 	u32 val, x_y, axi_id;
6605 
6606 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6607 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6608 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6609 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6610 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6611 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6612 
6613 	switch (x_y) {
6614 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6615 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6616 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6617 			return "TPC0";
6618 		}
6619 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6620 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6621 			return "NIC0";
6622 		}
6623 		break;
6624 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6625 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6626 		return "TPC1";
6627 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6628 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6629 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6630 		return "MME0";
6631 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6632 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6633 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6634 		return "MME1";
6635 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6636 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6637 		return "TPC2";
6638 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6639 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6640 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6641 			return "TPC3";
6642 		}
6643 		/* PCI, CPU or PSOC does not have engine id*/
6644 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6645 			return "PCI";
6646 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6647 			return "CPU";
6648 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6649 			return "PSOC";
6650 		break;
6651 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6652 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6653 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6654 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6655 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6656 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6657 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6658 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6659 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6660 				engine_id_1, engine_id_2);
6661 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6662 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6663 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6664 			return "TPC4";
6665 		}
6666 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6667 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6668 			return "NIC1";
6669 		}
6670 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6671 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6672 			return "NIC2";
6673 		}
6674 		break;
6675 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6676 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6677 		return "TPC5";
6678 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6679 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6680 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6681 		return "MME2";
6682 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6683 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6684 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6685 		return "MME3";
6686 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6687 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6688 		return "TPC6";
6689 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6690 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6691 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6692 			return "TPC7";
6693 		}
6694 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6695 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6696 			return "NIC4";
6697 		}
6698 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6699 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6700 			return "NIC5";
6701 		}
6702 		break;
6703 	default:
6704 		break;
6705 	}
6706 
6707 	dev_err(hdev->dev,
6708 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6709 		val,
6710 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6711 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6712 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6713 			RAZWI_INITIATOR_AXI_ID_MASK);
6714 
6715 	return "unknown initiator";
6716 }
6717 
6718 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6719 						u16 *engine_id_2, bool *is_read, bool *is_write)
6720 {
6721 
6722 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6723 		dev_err_ratelimited(hdev->dev,
6724 			"RAZWI event caused by illegal write of %s\n",
6725 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6726 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6727 		*is_write = true;
6728 	}
6729 
6730 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6731 		dev_err_ratelimited(hdev->dev,
6732 			"RAZWI event caused by illegal read of %s\n",
6733 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6734 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6735 		*is_read = true;
6736 	}
6737 }
6738 
6739 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6740 {
6741 	struct gaudi_device *gaudi = hdev->asic_specific;
6742 	u32 val;
6743 
6744 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6745 		return;
6746 
6747 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6748 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6749 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6750 		*addr <<= 32;
6751 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6752 
6753 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6754 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6755 
6756 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6757 	}
6758 
6759 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6760 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6761 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6762 		*addr <<= 32;
6763 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6764 
6765 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6766 
6767 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6768 	}
6769 }
6770 
6771 /*
6772  *  +-------------------+------------------------------------------------------+
6773  *  | Configuration Reg |                     Description                      |
6774  *  |      Address      |                                                      |
6775  *  +-------------------+------------------------------------------------------+
6776  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6777  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6778  *  |                   |0xF34 memory wrappers 63:32                           |
6779  *  |                   |0xF38 memory wrappers 95:64                           |
6780  *  |                   |0xF3C memory wrappers 127:96                          |
6781  *  +-------------------+------------------------------------------------------+
6782  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6783  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6784  *  |                   |0xF44 memory wrappers 63:32                           |
6785  *  |                   |0xF48 memory wrappers 95:64                           |
6786  *  |                   |0xF4C memory wrappers 127:96                          |
6787  *  +-------------------+------------------------------------------------------+
6788  */
6789 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6790 		struct ecc_info_extract_params *params, u64 *ecc_address,
6791 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6792 {
6793 	u32 i, num_mem_regs, reg, err_bit;
6794 	u64 err_addr, err_word = 0;
6795 
6796 	num_mem_regs = params->num_memories / 32 +
6797 			((params->num_memories % 32) ? 1 : 0);
6798 
6799 	if (params->block_address >= CFG_BASE)
6800 		params->block_address -= CFG_BASE;
6801 
6802 	if (params->derr)
6803 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6804 	else
6805 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6806 
6807 	/* Set invalid wrapper index */
6808 	*memory_wrapper_idx = 0xFF;
6809 
6810 	/* Iterate through memory wrappers, a single bit must be set */
6811 	for (i = 0 ; i < num_mem_regs ; i++) {
6812 		err_addr += i * 4;
6813 		err_word = RREG32(err_addr);
6814 		if (err_word) {
6815 			err_bit = __ffs(err_word);
6816 			*memory_wrapper_idx = err_bit + (32 * i);
6817 			break;
6818 		}
6819 	}
6820 
6821 	if (*memory_wrapper_idx == 0xFF) {
6822 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6823 		return -EINVAL;
6824 	}
6825 
6826 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6827 			*memory_wrapper_idx);
6828 
6829 	*ecc_address =
6830 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6831 	*ecc_syndrom =
6832 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6833 
6834 	/* Clear error indication */
6835 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6836 	if (params->derr)
6837 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6838 	else
6839 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6840 
6841 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6842 
6843 	return 0;
6844 }
6845 
6846 /*
6847  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6848  *
6849  * @idx: the current pi/ci value
6850  * @q_len: the queue length (power of 2)
6851  *
6852  * @return the cyclically decremented index
6853  */
6854 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6855 {
6856 	u32 mask = q_len - 1;
6857 
6858 	/*
6859 	 * modular decrement is equivalent to adding (queue_size -1)
6860 	 * later we take LSBs to make sure the value is in the
6861 	 * range [0, queue_len - 1]
6862 	 */
6863 	return (idx + q_len - 1) & mask;
6864 }
6865 
6866 /**
6867  * gaudi_handle_sw_config_stream_data - print SW config stream data
6868  *
6869  * @hdev: pointer to the habanalabs device structure
6870  * @stream: the QMAN's stream
6871  * @qman_base: base address of QMAN registers block
6872  * @event_mask: mask of the last events occurred
6873  */
6874 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6875 						u64 qman_base, u64 event_mask)
6876 {
6877 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6878 	u32 cq_ptr_lo_off, size;
6879 
6880 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6881 
6882 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6883 						stream * cq_ptr_lo_off;
6884 	cq_ptr_hi = cq_ptr_lo +
6885 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6886 	cq_tsize = cq_ptr_lo +
6887 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6888 
6889 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6890 	size = RREG32(cq_tsize);
6891 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6892 							stream, cq_ptr, size);
6893 
6894 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6895 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6896 		hdev->captured_err_info.undef_opcode.cq_size = size;
6897 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6898 	}
6899 }
6900 
6901 /**
6902  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6903  *
6904  * @hdev: pointer to the habanalabs device structure
6905  * @qid_base: first QID of the QMAN (out of 4 streams)
6906  * @stream: the QMAN's stream
6907  * @qman_base: base address of QMAN registers block
6908  * @event_mask: mask of the last events occurred
6909  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6910  */
6911 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6912 						u32 stream, u64 qman_base,
6913 						u64 event_mask,
6914 						bool pr_sw_conf)
6915 {
6916 	u32 ci, qm_ci_stream_off, queue_len;
6917 	struct hl_hw_queue *q;
6918 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6919 	int i;
6920 
6921 	q = &hdev->kernel_queues[qid_base + stream];
6922 
6923 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6924 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6925 						stream * qm_ci_stream_off;
6926 
6927 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6928 					q->int_queue_len : HL_QUEUE_LENGTH;
6929 
6930 	hdev->asic_funcs->hw_queues_lock(hdev);
6931 
6932 	if (pr_sw_conf)
6933 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6934 
6935 	ci = RREG32(pq_ci);
6936 
6937 	/* we should start printing form ci -1 */
6938 	ci = gaudi_queue_idx_dec(ci, queue_len);
6939 	memset(addr, 0, sizeof(addr));
6940 
6941 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6942 		struct hl_bd *bd;
6943 		u32 len;
6944 
6945 		bd = q->kernel_address;
6946 		bd += ci;
6947 
6948 		len = le32_to_cpu(bd->len);
6949 		/* len 0 means uninitialized entry- break */
6950 		if (!len)
6951 			break;
6952 
6953 		addr[i] = le64_to_cpu(bd->ptr);
6954 
6955 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6956 							stream, ci, addr[i], len);
6957 
6958 		/* get previous ci, wrap if needed */
6959 		ci = gaudi_queue_idx_dec(ci, queue_len);
6960 	}
6961 
6962 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6963 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6964 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6965 
6966 		if (arr_idx == 0) {
6967 			undef_opcode->timestamp = ktime_get();
6968 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6969 		}
6970 
6971 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6972 		undef_opcode->cb_addr_streams_len++;
6973 	}
6974 
6975 	hdev->asic_funcs->hw_queues_unlock(hdev);
6976 }
6977 
6978 /**
6979  * handle_qman_data_on_err - extract QMAN data on error
6980  *
6981  * @hdev: pointer to the habanalabs device structure
6982  * @qid_base: first QID of the QMAN (out of 4 streams)
6983  * @stream: the QMAN's stream
6984  * @qman_base: base address of QMAN registers block
6985  * @event_mask: mask of the last events occurred
6986  *
6987  * This function attempt to exatract as much data as possible on QMAN error.
6988  * On upper CP print the SW config stream data and last 8 PQEs.
6989  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6990  */
6991 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6992 				   u32 stream, u64 qman_base, u64 event_mask)
6993 {
6994 	u32 i;
6995 
6996 	if (stream != QMAN_STREAMS) {
6997 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6998 			qman_base, event_mask, true);
6999 		return;
7000 	}
7001 
7002 	/* handle Lower-CP */
7003 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7004 
7005 	for (i = 0; i < QMAN_STREAMS; i++)
7006 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7007 			qman_base, event_mask, false);
7008 }
7009 
7010 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7011 					  const char *qm_name,
7012 					  u64 qman_base,
7013 					  u32 qid_base,
7014 					  u64 *event_mask)
7015 {
7016 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7017 	u64 glbl_sts_addr, arb_err_addr;
7018 	char reg_desc[32];
7019 
7020 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7021 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7022 
7023 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7024 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7025 		glbl_sts_clr_val = 0;
7026 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7027 
7028 		if (!glbl_sts_val)
7029 			continue;
7030 
7031 		if (i == QMAN_STREAMS)
7032 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7033 		else
7034 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7035 
7036 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7037 			if (glbl_sts_val & BIT(j)) {
7038 				dev_err_ratelimited(hdev->dev,
7039 						"%s %s. err cause: %s\n",
7040 						qm_name, reg_desc,
7041 						gaudi_qman_error_cause[j]);
7042 				glbl_sts_clr_val |= BIT(j);
7043 			}
7044 		}
7045 		/* check for undefined opcode */
7046 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7047 				hdev->captured_err_info.undef_opcode.write_enable) {
7048 			memset(&hdev->captured_err_info.undef_opcode, 0,
7049 						sizeof(hdev->captured_err_info.undef_opcode));
7050 
7051 			hdev->captured_err_info.undef_opcode.write_enable = false;
7052 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7053 		}
7054 
7055 		/* Write 1 clear errors */
7056 		if (!hdev->stop_on_err)
7057 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7058 		else
7059 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7060 	}
7061 
7062 	arb_err_val = RREG32(arb_err_addr);
7063 
7064 	if (!arb_err_val)
7065 		return;
7066 
7067 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7068 		if (arb_err_val & BIT(j)) {
7069 			dev_err_ratelimited(hdev->dev,
7070 					"%s ARB_ERR. err cause: %s\n",
7071 					qm_name,
7072 					gaudi_qman_arb_error_cause[j]);
7073 		}
7074 	}
7075 }
7076 
7077 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7078 		struct hl_eq_sm_sei_data *sei_data)
7079 {
7080 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7081 
7082 	/* Flip the bits as the enum is ordered in the opposite way */
7083 	index = (index ^ 0x3) & 0x3;
7084 
7085 	switch (sei_data->sei_cause) {
7086 	case SM_SEI_SO_OVERFLOW:
7087 		dev_err_ratelimited(hdev->dev,
7088 			"%s SEI Error: SOB Group %u overflow/underflow",
7089 			gaudi_sync_manager_names[index],
7090 			le32_to_cpu(sei_data->sei_log));
7091 		break;
7092 	case SM_SEI_LBW_4B_UNALIGNED:
7093 		dev_err_ratelimited(hdev->dev,
7094 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7095 			gaudi_sync_manager_names[index],
7096 			le32_to_cpu(sei_data->sei_log));
7097 		break;
7098 	case SM_SEI_AXI_RESPONSE_ERR:
7099 		dev_err_ratelimited(hdev->dev,
7100 			"%s SEI Error: AXI ID %u response error",
7101 			gaudi_sync_manager_names[index],
7102 			le32_to_cpu(sei_data->sei_log));
7103 		break;
7104 	default:
7105 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7106 				le32_to_cpu(sei_data->sei_log));
7107 		break;
7108 	}
7109 }
7110 
7111 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7112 		struct hl_eq_ecc_data *ecc_data)
7113 {
7114 	struct ecc_info_extract_params params;
7115 	u64 ecc_address = 0, ecc_syndrom = 0;
7116 	u8 index, memory_wrapper_idx = 0;
7117 	bool extract_info_from_fw;
7118 	int rc;
7119 
7120 	if (hdev->asic_prop.fw_security_enabled) {
7121 		extract_info_from_fw = true;
7122 		goto extract_ecc_info;
7123 	}
7124 
7125 	switch (event_type) {
7126 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7127 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7128 		extract_info_from_fw = true;
7129 		break;
7130 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7131 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7132 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7133 		params.num_memories = 90;
7134 		params.derr = false;
7135 		extract_info_from_fw = false;
7136 		break;
7137 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7138 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7139 		params.block_address =
7140 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7141 		params.num_memories = 90;
7142 		params.derr = true;
7143 		extract_info_from_fw = false;
7144 		break;
7145 	case GAUDI_EVENT_MME0_ACC_SERR:
7146 	case GAUDI_EVENT_MME1_ACC_SERR:
7147 	case GAUDI_EVENT_MME2_ACC_SERR:
7148 	case GAUDI_EVENT_MME3_ACC_SERR:
7149 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7150 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7151 		params.num_memories = 128;
7152 		params.derr = false;
7153 		extract_info_from_fw = false;
7154 		break;
7155 	case GAUDI_EVENT_MME0_ACC_DERR:
7156 	case GAUDI_EVENT_MME1_ACC_DERR:
7157 	case GAUDI_EVENT_MME2_ACC_DERR:
7158 	case GAUDI_EVENT_MME3_ACC_DERR:
7159 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7160 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7161 		params.num_memories = 128;
7162 		params.derr = true;
7163 		extract_info_from_fw = false;
7164 		break;
7165 	case GAUDI_EVENT_MME0_SBAB_SERR:
7166 	case GAUDI_EVENT_MME1_SBAB_SERR:
7167 	case GAUDI_EVENT_MME2_SBAB_SERR:
7168 	case GAUDI_EVENT_MME3_SBAB_SERR:
7169 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7170 		params.block_address =
7171 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7172 		params.num_memories = 33;
7173 		params.derr = false;
7174 		extract_info_from_fw = false;
7175 		break;
7176 	case GAUDI_EVENT_MME0_SBAB_DERR:
7177 	case GAUDI_EVENT_MME1_SBAB_DERR:
7178 	case GAUDI_EVENT_MME2_SBAB_DERR:
7179 	case GAUDI_EVENT_MME3_SBAB_DERR:
7180 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7181 		params.block_address =
7182 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7183 		params.num_memories = 33;
7184 		params.derr = true;
7185 		extract_info_from_fw = false;
7186 		break;
7187 	default:
7188 		return;
7189 	}
7190 
7191 extract_ecc_info:
7192 	if (extract_info_from_fw) {
7193 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7194 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7195 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7196 	} else {
7197 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7198 				&ecc_syndrom, &memory_wrapper_idx);
7199 		if (rc)
7200 			return;
7201 	}
7202 
7203 	dev_err(hdev->dev,
7204 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7205 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7206 }
7207 
7208 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7209 {
7210 	u64 qman_base;
7211 	char desc[32];
7212 	u32 qid_base;
7213 	u8 index;
7214 
7215 	switch (event_type) {
7216 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7217 		index = event_type - GAUDI_EVENT_TPC0_QM;
7218 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7219 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7220 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7221 		break;
7222 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7223 		if (event_type == GAUDI_EVENT_MME0_QM) {
7224 			index = 0;
7225 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7226 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7227 			index = 2;
7228 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7229 		}
7230 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7231 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7232 		break;
7233 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7234 		index = event_type - GAUDI_EVENT_DMA0_QM;
7235 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7236 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7237 		if (index > 1)
7238 			qid_base++;
7239 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7240 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7241 		break;
7242 	case GAUDI_EVENT_NIC0_QM0:
7243 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7244 		qman_base = mmNIC0_QM0_BASE;
7245 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7246 		break;
7247 	case GAUDI_EVENT_NIC0_QM1:
7248 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7249 		qman_base = mmNIC0_QM1_BASE;
7250 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7251 		break;
7252 	case GAUDI_EVENT_NIC1_QM0:
7253 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7254 		qman_base = mmNIC1_QM0_BASE;
7255 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7256 		break;
7257 	case GAUDI_EVENT_NIC1_QM1:
7258 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7259 		qman_base = mmNIC1_QM1_BASE;
7260 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7261 		break;
7262 	case GAUDI_EVENT_NIC2_QM0:
7263 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7264 		qman_base = mmNIC2_QM0_BASE;
7265 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7266 		break;
7267 	case GAUDI_EVENT_NIC2_QM1:
7268 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7269 		qman_base = mmNIC2_QM1_BASE;
7270 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7271 		break;
7272 	case GAUDI_EVENT_NIC3_QM0:
7273 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7274 		qman_base = mmNIC3_QM0_BASE;
7275 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7276 		break;
7277 	case GAUDI_EVENT_NIC3_QM1:
7278 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7279 		qman_base = mmNIC3_QM1_BASE;
7280 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7281 		break;
7282 	case GAUDI_EVENT_NIC4_QM0:
7283 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7284 		qman_base = mmNIC4_QM0_BASE;
7285 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7286 		break;
7287 	case GAUDI_EVENT_NIC4_QM1:
7288 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7289 		qman_base = mmNIC4_QM1_BASE;
7290 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7291 		break;
7292 	default:
7293 		return;
7294 	}
7295 
7296 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7297 }
7298 
7299 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7300 					bool razwi, u64 *event_mask)
7301 {
7302 	bool is_read = false, is_write = false;
7303 	u16 engine_id[2], num_of_razwi_eng = 0;
7304 	char desc[64] = "";
7305 	u64 razwi_addr = 0;
7306 	u8 razwi_flags = 0;
7307 
7308 	/*
7309 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7310 	 * engine id it will get valid value.
7311 	 */
7312 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7313 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7314 
7315 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7316 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7317 		event_type, desc);
7318 
7319 	if (razwi) {
7320 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7321 						&is_write);
7322 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7323 
7324 		if (is_read)
7325 			razwi_flags |= HL_RAZWI_READ;
7326 		if (is_write)
7327 			razwi_flags |= HL_RAZWI_WRITE;
7328 
7329 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7330 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7331 				num_of_razwi_eng = 2;
7332 			else
7333 				num_of_razwi_eng = 1;
7334 		}
7335 
7336 		hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags,
7337 				event_mask);
7338 	}
7339 }
7340 
7341 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7342 					struct cpucp_pkt_sync_err *sync_err)
7343 {
7344 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7345 
7346 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7347 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7348 }
7349 
7350 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7351 					struct hl_eq_fw_alive *fw_alive)
7352 {
7353 	dev_err(hdev->dev,
7354 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7355 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7356 		le32_to_cpu(fw_alive->process_id),
7357 		le32_to_cpu(fw_alive->thread_id),
7358 		le64_to_cpu(fw_alive->uptime_seconds));
7359 }
7360 
7361 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7362 						void *data)
7363 {
7364 	char desc[64] = "", *type;
7365 	struct eq_nic_sei_event *eq_nic_sei = data;
7366 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7367 
7368 	switch (eq_nic_sei->axi_error_cause) {
7369 	case RXB:
7370 		type = "RXB";
7371 		break;
7372 	case RXE:
7373 		type = "RXE";
7374 		break;
7375 	case TXS:
7376 		type = "TXS";
7377 		break;
7378 	case TXE:
7379 		type = "TXE";
7380 		break;
7381 	case QPC_RESP:
7382 		type = "QPC_RESP";
7383 		break;
7384 	case NON_AXI_ERR:
7385 		type = "NON_AXI_ERR";
7386 		break;
7387 	case TMR:
7388 		type = "TMR";
7389 		break;
7390 	default:
7391 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7392 			eq_nic_sei->axi_error_cause);
7393 		type = "N/A";
7394 		break;
7395 	}
7396 
7397 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7398 			eq_nic_sei->id);
7399 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7400 		event_type, desc);
7401 }
7402 
7403 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7404 {
7405 	/* GAUDI doesn't support any reset except hard-reset */
7406 	return -EPERM;
7407 }
7408 
7409 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7410 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7411 {
7412 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7413 	int rc = 0;
7414 
7415 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7416 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7417 		if (!hbm_ecc_data) {
7418 			dev_err(hdev->dev, "No FW ECC data");
7419 			return 0;
7420 		}
7421 
7422 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7423 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7424 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7425 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7426 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7427 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7428 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7429 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7430 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7431 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7433 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7435 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436 
7437 		dev_err(hdev->dev,
7438 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7439 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7440 		dev_err(hdev->dev,
7441 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7442 			device, ch, hbm_ecc_data->first_addr, type,
7443 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7444 			hbm_ecc_data->dec_cnt);
7445 		return 0;
7446 	}
7447 
7448 	if (hdev->asic_prop.fw_security_enabled) {
7449 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7450 		return 0;
7451 	}
7452 
7453 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7454 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7455 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7456 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7457 		if (val) {
7458 			rc = -EIO;
7459 			dev_err(hdev->dev,
7460 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7461 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7462 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7463 				(val >> 4) & 0x1);
7464 
7465 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7466 			dev_err(hdev->dev,
7467 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7468 				device, ch * 2,
7469 				RREG32(base + ch * 0x1000 + 0x064),
7470 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7471 				(val2 & 0xFF0000) >> 16,
7472 				(val2 & 0xFF000000) >> 24);
7473 		}
7474 
7475 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7476 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7477 		if (val) {
7478 			rc = -EIO;
7479 			dev_err(hdev->dev,
7480 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7481 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7482 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7483 				(val >> 4) & 0x1);
7484 
7485 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7486 			dev_err(hdev->dev,
7487 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7488 				device, ch * 2 + 1,
7489 				RREG32(base + ch * 0x1000 + 0x074),
7490 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7491 				(val2 & 0xFF0000) >> 16,
7492 				(val2 & 0xFF000000) >> 24);
7493 		}
7494 
7495 		/* Clear interrupts */
7496 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7497 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7498 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7499 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7500 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7501 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7502 	}
7503 
7504 	val  = RREG32(base + 0x8F30);
7505 	val2 = RREG32(base + 0x8F34);
7506 	if (val | val2) {
7507 		rc = -EIO;
7508 		dev_err(hdev->dev,
7509 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7510 			device, val, val2);
7511 	}
7512 	val  = RREG32(base + 0x8F40);
7513 	val2 = RREG32(base + 0x8F44);
7514 	if (val | val2) {
7515 		rc = -EIO;
7516 		dev_err(hdev->dev,
7517 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7518 			device, val, val2);
7519 	}
7520 
7521 	return rc;
7522 }
7523 
7524 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7525 {
7526 	switch (hbm_event_type) {
7527 	case GAUDI_EVENT_HBM0_SPI_0:
7528 	case GAUDI_EVENT_HBM0_SPI_1:
7529 		return 0;
7530 	case GAUDI_EVENT_HBM1_SPI_0:
7531 	case GAUDI_EVENT_HBM1_SPI_1:
7532 		return 1;
7533 	case GAUDI_EVENT_HBM2_SPI_0:
7534 	case GAUDI_EVENT_HBM2_SPI_1:
7535 		return 2;
7536 	case GAUDI_EVENT_HBM3_SPI_0:
7537 	case GAUDI_EVENT_HBM3_SPI_1:
7538 		return 3;
7539 	default:
7540 		break;
7541 	}
7542 
7543 	/* Should never happen */
7544 	return 0;
7545 }
7546 
7547 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7548 					char *interrupt_name)
7549 {
7550 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7551 	bool soft_reset_required = false;
7552 
7553 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7554 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7555 
7556 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7557 		if (tpc_interrupts_cause & BIT(i)) {
7558 			dev_err_ratelimited(hdev->dev,
7559 					"TPC%d_%s interrupt cause: %s\n",
7560 					tpc_id, interrupt_name,
7561 					gaudi_tpc_interrupts_cause[i]);
7562 			/* If this is QM error, we need to soft-reset */
7563 			if (i == 15)
7564 				soft_reset_required = true;
7565 		}
7566 
7567 	/* Clear interrupts */
7568 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7569 
7570 	return soft_reset_required;
7571 }
7572 
7573 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7574 {
7575 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7576 }
7577 
7578 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7579 {
7580 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7581 }
7582 
7583 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7584 {
7585 	ktime_t zero_time = ktime_set(0, 0);
7586 
7587 	mutex_lock(&hdev->clk_throttling.lock);
7588 
7589 	switch (event_type) {
7590 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7591 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7592 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7593 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7594 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7595 		dev_info_ratelimited(hdev->dev,
7596 			"Clock throttling due to power consumption\n");
7597 		break;
7598 
7599 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7600 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7601 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7602 		dev_info_ratelimited(hdev->dev,
7603 			"Power envelop is safe, back to optimal clock\n");
7604 		break;
7605 
7606 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7607 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7608 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7609 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7610 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7611 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7612 		dev_info_ratelimited(hdev->dev,
7613 			"Clock throttling due to overheating\n");
7614 		break;
7615 
7616 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7617 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7618 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7619 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7620 		dev_info_ratelimited(hdev->dev,
7621 			"Thermal envelop is safe, back to optimal clock\n");
7622 		break;
7623 
7624 	default:
7625 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7626 			event_type);
7627 		break;
7628 	}
7629 
7630 	mutex_unlock(&hdev->clk_throttling.lock);
7631 }
7632 
7633 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7634 {
7635 	struct gaudi_device *gaudi = hdev->asic_specific;
7636 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7637 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7638 	u32 fw_fatal_err_flag = 0, flags = 0;
7639 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7640 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7641 	bool reset_required, reset_direct = false;
7642 	u8 cause;
7643 	int rc;
7644 
7645 	if (event_type >= GAUDI_EVENT_SIZE) {
7646 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7647 				event_type, GAUDI_EVENT_SIZE - 1);
7648 		return;
7649 	}
7650 
7651 	gaudi->events_stat[event_type]++;
7652 	gaudi->events_stat_aggregate[event_type]++;
7653 
7654 	switch (event_type) {
7655 	case GAUDI_EVENT_PCIE_CORE_DERR:
7656 	case GAUDI_EVENT_PCIE_IF_DERR:
7657 	case GAUDI_EVENT_PCIE_PHY_DERR:
7658 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7659 	case GAUDI_EVENT_MME0_ACC_DERR:
7660 	case GAUDI_EVENT_MME0_SBAB_DERR:
7661 	case GAUDI_EVENT_MME1_ACC_DERR:
7662 	case GAUDI_EVENT_MME1_SBAB_DERR:
7663 	case GAUDI_EVENT_MME2_ACC_DERR:
7664 	case GAUDI_EVENT_MME2_SBAB_DERR:
7665 	case GAUDI_EVENT_MME3_ACC_DERR:
7666 	case GAUDI_EVENT_MME3_SBAB_DERR:
7667 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7668 		fallthrough;
7669 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7670 	case GAUDI_EVENT_PSOC_MEM_DERR:
7671 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7672 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7673 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7674 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7675 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7676 	case GAUDI_EVENT_MMU_DERR:
7677 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7678 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7679 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7680 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7681 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7682 		goto reset_device;
7683 
7684 	case GAUDI_EVENT_GIC500:
7685 	case GAUDI_EVENT_AXI_ECC:
7686 	case GAUDI_EVENT_L2_RAM_ECC:
7687 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7688 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7689 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7690 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7691 		goto reset_device;
7692 
7693 	case GAUDI_EVENT_HBM0_SPI_0:
7694 	case GAUDI_EVENT_HBM1_SPI_0:
7695 	case GAUDI_EVENT_HBM2_SPI_0:
7696 	case GAUDI_EVENT_HBM3_SPI_0:
7697 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7698 		gaudi_hbm_read_interrupts(hdev,
7699 				gaudi_hbm_event_to_dev(event_type),
7700 				&eq_entry->hbm_ecc_data);
7701 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7702 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7703 		goto reset_device;
7704 
7705 	case GAUDI_EVENT_HBM0_SPI_1:
7706 	case GAUDI_EVENT_HBM1_SPI_1:
7707 	case GAUDI_EVENT_HBM2_SPI_1:
7708 	case GAUDI_EVENT_HBM3_SPI_1:
7709 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7710 		gaudi_hbm_read_interrupts(hdev,
7711 				gaudi_hbm_event_to_dev(event_type),
7712 				&eq_entry->hbm_ecc_data);
7713 		hl_fw_unmask_irq(hdev, event_type);
7714 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7715 		break;
7716 
7717 	case GAUDI_EVENT_TPC0_DEC:
7718 	case GAUDI_EVENT_TPC1_DEC:
7719 	case GAUDI_EVENT_TPC2_DEC:
7720 	case GAUDI_EVENT_TPC3_DEC:
7721 	case GAUDI_EVENT_TPC4_DEC:
7722 	case GAUDI_EVENT_TPC5_DEC:
7723 	case GAUDI_EVENT_TPC6_DEC:
7724 	case GAUDI_EVENT_TPC7_DEC:
7725 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7726 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7727 		 * The SW upper layer will inspect an internal mapped area to indicate
7728 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7729 		 */
7730 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7731 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7732 		reset_required = gaudi_tpc_read_interrupts(hdev,
7733 					tpc_dec_event_to_tpc_id(event_type),
7734 					"AXI_SLV_DEC_Error");
7735 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7736 		if (reset_required) {
7737 			dev_err(hdev->dev, "reset required due to %s\n",
7738 				gaudi_irq_map_table[event_type].name);
7739 
7740 			reset_direct = true;
7741 			goto reset_device;
7742 		} else {
7743 			hl_fw_unmask_irq(hdev, event_type);
7744 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7745 		}
7746 		break;
7747 
7748 	case GAUDI_EVENT_TPC0_KRN_ERR:
7749 	case GAUDI_EVENT_TPC1_KRN_ERR:
7750 	case GAUDI_EVENT_TPC2_KRN_ERR:
7751 	case GAUDI_EVENT_TPC3_KRN_ERR:
7752 	case GAUDI_EVENT_TPC4_KRN_ERR:
7753 	case GAUDI_EVENT_TPC5_KRN_ERR:
7754 	case GAUDI_EVENT_TPC6_KRN_ERR:
7755 	case GAUDI_EVENT_TPC7_KRN_ERR:
7756 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7757 		reset_required = gaudi_tpc_read_interrupts(hdev,
7758 					tpc_krn_event_to_tpc_id(event_type),
7759 					"KRN_ERR");
7760 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7761 		if (reset_required) {
7762 			dev_err(hdev->dev, "reset required due to %s\n",
7763 				gaudi_irq_map_table[event_type].name);
7764 
7765 			reset_direct = true;
7766 			goto reset_device;
7767 		} else {
7768 			hl_fw_unmask_irq(hdev, event_type);
7769 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7770 		}
7771 		break;
7772 
7773 	case GAUDI_EVENT_PCIE_CORE_SERR:
7774 	case GAUDI_EVENT_PCIE_IF_SERR:
7775 	case GAUDI_EVENT_PCIE_PHY_SERR:
7776 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7777 	case GAUDI_EVENT_MME0_ACC_SERR:
7778 	case GAUDI_EVENT_MME0_SBAB_SERR:
7779 	case GAUDI_EVENT_MME1_ACC_SERR:
7780 	case GAUDI_EVENT_MME1_SBAB_SERR:
7781 	case GAUDI_EVENT_MME2_ACC_SERR:
7782 	case GAUDI_EVENT_MME2_SBAB_SERR:
7783 	case GAUDI_EVENT_MME3_ACC_SERR:
7784 	case GAUDI_EVENT_MME3_SBAB_SERR:
7785 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7786 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7787 	case GAUDI_EVENT_PSOC_MEM_SERR:
7788 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7789 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7790 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7791 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7792 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7793 		fallthrough;
7794 	case GAUDI_EVENT_MMU_SERR:
7795 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7796 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7797 		hl_fw_unmask_irq(hdev, event_type);
7798 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7799 		break;
7800 
7801 	case GAUDI_EVENT_PCIE_DEC:
7802 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7803 	case GAUDI_EVENT_PSOC_AXI_DEC:
7804 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7805 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7806 		hl_fw_unmask_irq(hdev, event_type);
7807 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7808 		break;
7809 
7810 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7811 	case GAUDI_EVENT_MMU_WR_PERM:
7812 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7813 		hl_fw_unmask_irq(hdev, event_type);
7814 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7815 		break;
7816 
7817 	case GAUDI_EVENT_MME0_WBC_RSP:
7818 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7819 	case GAUDI_EVENT_MME1_WBC_RSP:
7820 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7821 	case GAUDI_EVENT_MME2_WBC_RSP:
7822 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7823 	case GAUDI_EVENT_MME3_WBC_RSP:
7824 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7825 	case GAUDI_EVENT_RAZWI_OR_ADC:
7826 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7827 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7828 		fallthrough;
7829 	case GAUDI_EVENT_NIC0_QM0:
7830 	case GAUDI_EVENT_NIC0_QM1:
7831 	case GAUDI_EVENT_NIC1_QM0:
7832 	case GAUDI_EVENT_NIC1_QM1:
7833 	case GAUDI_EVENT_NIC2_QM0:
7834 	case GAUDI_EVENT_NIC2_QM1:
7835 	case GAUDI_EVENT_NIC3_QM0:
7836 	case GAUDI_EVENT_NIC3_QM1:
7837 	case GAUDI_EVENT_NIC4_QM0:
7838 	case GAUDI_EVENT_NIC4_QM1:
7839 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7840 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7841 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7842 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7843 		hl_fw_unmask_irq(hdev, event_type);
7844 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7845 		break;
7846 
7847 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7848 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7849 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7850 		goto reset_device;
7851 
7852 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7853 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7854 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7855 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7856 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7857 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7858 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7859 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7860 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7861 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7862 		hl_fw_unmask_irq(hdev, event_type);
7863 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7864 		break;
7865 
7866 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7867 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7868 		hl_fw_unmask_irq(hdev, event_type);
7869 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7870 		break;
7871 
7872 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7873 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7874 		gaudi_print_sm_sei_info(hdev, event_type,
7875 					&eq_entry->sm_sei_data);
7876 		rc = hl_state_dump(hdev);
7877 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7878 		if (rc)
7879 			dev_err(hdev->dev,
7880 				"Error during system state dump %d\n", rc);
7881 		hl_fw_unmask_irq(hdev, event_type);
7882 		break;
7883 
7884 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7885 		break;
7886 
7887 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7888 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7889 		hl_fw_unmask_irq(hdev, event_type);
7890 		break;
7891 
7892 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7893 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7894 		dev_err(hdev->dev,
7895 			"Received high temp H/W interrupt %d (cause %d)\n",
7896 			event_type, cause);
7897 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7898 		break;
7899 
7900 	case GAUDI_EVENT_DEV_RESET_REQ:
7901 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7902 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7903 		goto reset_device;
7904 
7905 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7906 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7907 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7908 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7909 		goto reset_device;
7910 
7911 	case GAUDI_EVENT_FW_ALIVE_S:
7912 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7913 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7914 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7915 		goto reset_device;
7916 
7917 	default:
7918 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7919 				event_type);
7920 		break;
7921 	}
7922 
7923 	if (event_mask)
7924 		hl_notifier_event_send_all(hdev, event_mask);
7925 
7926 	return;
7927 
7928 reset_device:
7929 	reset_required = true;
7930 
7931 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7932 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7933 
7934 		/* notify on device unavailable while the reset triggered by fw */
7935 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7936 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7937 	} else if (hdev->hard_reset_on_fw_events) {
7938 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7939 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7940 	} else {
7941 		reset_required = false;
7942 	}
7943 
7944 	if (reset_required) {
7945 		hl_device_cond_reset(hdev, flags, event_mask);
7946 	} else {
7947 		hl_fw_unmask_irq(hdev, event_type);
7948 		/* Notification on occurred event needs to be sent although reset is not executed */
7949 		if (event_mask)
7950 			hl_notifier_event_send_all(hdev, event_mask);
7951 	}
7952 }
7953 
7954 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7955 {
7956 	struct gaudi_device *gaudi = hdev->asic_specific;
7957 
7958 	if (aggregate) {
7959 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7960 		return gaudi->events_stat_aggregate;
7961 	}
7962 
7963 	*size = (u32) sizeof(gaudi->events_stat);
7964 	return gaudi->events_stat;
7965 }
7966 
7967 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7968 {
7969 	struct gaudi_device *gaudi = hdev->asic_specific;
7970 	u32 status, timeout_usec;
7971 	int rc;
7972 
7973 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7974 		hdev->reset_info.hard_reset_pending)
7975 		return 0;
7976 
7977 	if (hdev->pldm)
7978 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7979 	else
7980 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7981 
7982 	/* L0 & L1 invalidation */
7983 	WREG32(mmSTLB_INV_PS, 3);
7984 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7985 	WREG32(mmSTLB_INV_PS, 2);
7986 
7987 	rc = hl_poll_timeout(
7988 		hdev,
7989 		mmSTLB_INV_PS,
7990 		status,
7991 		!status,
7992 		1000,
7993 		timeout_usec);
7994 
7995 	WREG32(mmSTLB_INV_SET, 0);
7996 
7997 	return rc;
7998 }
7999 
8000 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8001 						bool is_hard, u32 flags,
8002 						u32 asid, u64 va, u64 size)
8003 {
8004 	/* Treat as invalidate all because there is no range invalidation
8005 	 * in Gaudi
8006 	 */
8007 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8008 }
8009 
8010 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8011 {
8012 	u32 status, timeout_usec;
8013 	int rc;
8014 
8015 	if (hdev->pldm)
8016 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8017 	else
8018 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8019 
8020 	WREG32(MMU_ASID, asid);
8021 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8022 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8023 	WREG32(MMU_BUSY, 0x80000000);
8024 
8025 	rc = hl_poll_timeout(
8026 		hdev,
8027 		MMU_BUSY,
8028 		status,
8029 		!(status & 0x80000000),
8030 		1000,
8031 		timeout_usec);
8032 
8033 	if (rc) {
8034 		dev_err(hdev->dev,
8035 			"Timeout during MMU hop0 config of asid %d\n", asid);
8036 		return rc;
8037 	}
8038 
8039 	return 0;
8040 }
8041 
8042 static int gaudi_send_heartbeat(struct hl_device *hdev)
8043 {
8044 	struct gaudi_device *gaudi = hdev->asic_specific;
8045 
8046 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8047 		return 0;
8048 
8049 	return hl_fw_send_heartbeat(hdev);
8050 }
8051 
8052 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8053 {
8054 	struct gaudi_device *gaudi = hdev->asic_specific;
8055 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8056 	int rc;
8057 
8058 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8059 		return 0;
8060 
8061 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8062 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8063 					mmCPU_BOOT_ERR1);
8064 	if (rc)
8065 		return rc;
8066 
8067 	if (!strlen(prop->cpucp_info.card_name))
8068 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8069 				CARD_NAME_MAX_LEN);
8070 
8071 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8072 
8073 	set_default_power_values(hdev);
8074 
8075 	return 0;
8076 }
8077 
8078 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8079 		struct engines_data *e)
8080 {
8081 	struct gaudi_device *gaudi = hdev->asic_specific;
8082 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8083 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8084 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8085 	unsigned long *mask = (unsigned long *)mask_arr;
8086 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8087 	bool is_idle = true, is_eng_idle, is_slave;
8088 	u64 offset;
8089 	int i, dma_id, port;
8090 
8091 	if (e)
8092 		hl_engine_data_sprintf(e,
8093 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8094 			"---  -------  ------------  ----------  -------------\n");
8095 
8096 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8097 		dma_id = gaudi_dma_assignment[i];
8098 		offset = dma_id * DMA_QMAN_OFFSET;
8099 
8100 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8101 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8102 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8103 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8104 				IS_DMA_IDLE(dma_core_sts0);
8105 		is_idle &= is_eng_idle;
8106 
8107 		if (mask && !is_eng_idle)
8108 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8109 		if (e)
8110 			hl_engine_data_sprintf(e, fmt, dma_id,
8111 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8112 				qm_cgm_sts, dma_core_sts0);
8113 	}
8114 
8115 	if (e)
8116 		hl_engine_data_sprintf(e,
8117 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8118 			"---  -------  ------------  ----------  ----------\n");
8119 
8120 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8121 		offset = i * TPC_QMAN_OFFSET;
8122 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8123 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8124 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8125 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8126 				IS_TPC_IDLE(tpc_cfg_sts);
8127 		is_idle &= is_eng_idle;
8128 
8129 		if (mask && !is_eng_idle)
8130 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8131 		if (e)
8132 			hl_engine_data_sprintf(e, fmt, i,
8133 				is_eng_idle ? "Y" : "N",
8134 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8135 	}
8136 
8137 	if (e)
8138 		hl_engine_data_sprintf(e,
8139 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8140 			"---  -------  ------------  ----------  -----------\n");
8141 
8142 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8143 		offset = i * MME_QMAN_OFFSET;
8144 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8145 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8146 
8147 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8148 		is_slave = i % 2;
8149 		if (!is_slave) {
8150 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8151 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8152 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8153 		}
8154 
8155 		is_idle &= is_eng_idle;
8156 
8157 		if (mask && !is_eng_idle)
8158 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8159 		if (e) {
8160 			if (!is_slave)
8161 				hl_engine_data_sprintf(e, fmt, i,
8162 					is_eng_idle ? "Y" : "N",
8163 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8164 			else
8165 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8166 					is_eng_idle ? "Y" : "N", "-",
8167 					"-", mme_arch_sts);
8168 		}
8169 	}
8170 
8171 	if (e)
8172 		hl_engine_data_sprintf(e,
8173 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8174 				"---  -------  ------------  ----------\n");
8175 
8176 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8177 		offset = i * NIC_MACRO_QMAN_OFFSET;
8178 		port = 2 * i;
8179 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8180 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8181 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8182 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8183 			is_idle &= is_eng_idle;
8184 
8185 			if (mask && !is_eng_idle)
8186 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8187 			if (e)
8188 				hl_engine_data_sprintf(e, nic_fmt, port,
8189 						is_eng_idle ? "Y" : "N",
8190 						qm_glbl_sts0, qm_cgm_sts);
8191 		}
8192 
8193 		port = 2 * i + 1;
8194 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8195 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8196 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8197 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8198 			is_idle &= is_eng_idle;
8199 
8200 			if (mask && !is_eng_idle)
8201 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8202 			if (e)
8203 				hl_engine_data_sprintf(e, nic_fmt, port,
8204 						is_eng_idle ? "Y" : "N",
8205 						qm_glbl_sts0, qm_cgm_sts);
8206 		}
8207 	}
8208 
8209 	if (e)
8210 		hl_engine_data_sprintf(e, "\n");
8211 
8212 	return is_idle;
8213 }
8214 
8215 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8216 	__acquires(&gaudi->hw_queues_lock)
8217 {
8218 	struct gaudi_device *gaudi = hdev->asic_specific;
8219 
8220 	spin_lock(&gaudi->hw_queues_lock);
8221 }
8222 
8223 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8224 	__releases(&gaudi->hw_queues_lock)
8225 {
8226 	struct gaudi_device *gaudi = hdev->asic_specific;
8227 
8228 	spin_unlock(&gaudi->hw_queues_lock);
8229 }
8230 
8231 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8232 {
8233 	return hdev->pdev->device;
8234 }
8235 
8236 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8237 				size_t max_size)
8238 {
8239 	struct gaudi_device *gaudi = hdev->asic_specific;
8240 
8241 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8242 		return 0;
8243 
8244 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8245 }
8246 
8247 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8248 {
8249 	struct gaudi_device *gaudi = hdev->asic_specific;
8250 
8251 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8252 		return 0;
8253 
8254 	return hl_fw_get_monitor_dump(hdev, data);
8255 }
8256 
8257 /*
8258  * this function should be used only during initialization and/or after reset,
8259  * when there are no active users.
8260  */
8261 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8262 {
8263 	u64 kernel_timeout;
8264 	u32 status, offset;
8265 	int rc;
8266 
8267 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8268 
8269 	if (hdev->pldm)
8270 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8271 	else
8272 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8273 
8274 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8275 			lower_32_bits(tpc_kernel));
8276 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8277 			upper_32_bits(tpc_kernel));
8278 
8279 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8280 			lower_32_bits(tpc_kernel));
8281 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8282 			upper_32_bits(tpc_kernel));
8283 	/* set a valid LUT pointer, content is of no significance */
8284 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8285 			lower_32_bits(tpc_kernel));
8286 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8287 			upper_32_bits(tpc_kernel));
8288 
8289 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8290 			lower_32_bits(CFG_BASE +
8291 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8292 
8293 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8294 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8295 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8296 	/* wait a bit for the engine to start executing */
8297 	usleep_range(1000, 1500);
8298 
8299 	/* wait until engine has finished executing */
8300 	rc = hl_poll_timeout(
8301 		hdev,
8302 		mmTPC0_CFG_STATUS + offset,
8303 		status,
8304 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8305 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8306 		1000,
8307 		kernel_timeout);
8308 
8309 	if (rc) {
8310 		dev_err(hdev->dev,
8311 			"Timeout while waiting for TPC%d icache prefetch\n",
8312 			tpc_id);
8313 		return -EIO;
8314 	}
8315 
8316 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8317 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8318 
8319 	/* wait a bit for the engine to start executing */
8320 	usleep_range(1000, 1500);
8321 
8322 	/* wait until engine has finished executing */
8323 	rc = hl_poll_timeout(
8324 		hdev,
8325 		mmTPC0_CFG_STATUS + offset,
8326 		status,
8327 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8328 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8329 		1000,
8330 		kernel_timeout);
8331 
8332 	if (rc) {
8333 		dev_err(hdev->dev,
8334 			"Timeout while waiting for TPC%d vector pipe\n",
8335 			tpc_id);
8336 		return -EIO;
8337 	}
8338 
8339 	rc = hl_poll_timeout(
8340 		hdev,
8341 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8342 		status,
8343 		(status == 0),
8344 		1000,
8345 		kernel_timeout);
8346 
8347 	if (rc) {
8348 		dev_err(hdev->dev,
8349 			"Timeout while waiting for TPC%d kernel to execute\n",
8350 			tpc_id);
8351 		return -EIO;
8352 	}
8353 
8354 	return 0;
8355 }
8356 
8357 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8358 		struct hl_ctx *ctx)
8359 {
8360 	struct gaudi_device *gaudi = hdev->asic_specific;
8361 	int min_alloc_order, rc, collective_cb_size;
8362 
8363 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8364 		return 0;
8365 
8366 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8367 							HOST_SPACE_INTERNAL_CB_SZ,
8368 							&hdev->internal_cb_pool_dma_addr,
8369 							GFP_KERNEL | __GFP_ZERO);
8370 
8371 	if (!hdev->internal_cb_pool_virt_addr)
8372 		return -ENOMEM;
8373 
8374 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8375 			sizeof(struct packet_fence);
8376 	min_alloc_order = ilog2(collective_cb_size);
8377 
8378 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8379 	if (!hdev->internal_cb_pool) {
8380 		dev_err(hdev->dev,
8381 			"Failed to create internal CB pool\n");
8382 		rc = -ENOMEM;
8383 		goto free_internal_cb_pool;
8384 	}
8385 
8386 	rc = gen_pool_add(hdev->internal_cb_pool,
8387 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8388 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8389 	if (rc) {
8390 		dev_err(hdev->dev,
8391 			"Failed to add memory to internal CB pool\n");
8392 		rc = -EFAULT;
8393 		goto destroy_internal_cb_pool;
8394 	}
8395 
8396 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8397 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8398 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8399 
8400 	if (!hdev->internal_cb_va_base) {
8401 		rc = -ENOMEM;
8402 		goto destroy_internal_cb_pool;
8403 	}
8404 
8405 	mutex_lock(&hdev->mmu_lock);
8406 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8407 			hdev->internal_cb_pool_dma_addr,
8408 			HOST_SPACE_INTERNAL_CB_SZ);
8409 
8410 	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8411 	mutex_unlock(&hdev->mmu_lock);
8412 
8413 	if (rc)
8414 		goto unreserve_internal_cb_pool;
8415 
8416 	return 0;
8417 
8418 unreserve_internal_cb_pool:
8419 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8420 			HOST_SPACE_INTERNAL_CB_SZ);
8421 destroy_internal_cb_pool:
8422 	gen_pool_destroy(hdev->internal_cb_pool);
8423 free_internal_cb_pool:
8424 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8425 					hdev->internal_cb_pool_dma_addr);
8426 
8427 	return rc;
8428 }
8429 
8430 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8431 		struct hl_ctx *ctx)
8432 {
8433 	struct gaudi_device *gaudi = hdev->asic_specific;
8434 
8435 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8436 		return;
8437 
8438 	mutex_lock(&hdev->mmu_lock);
8439 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8440 			HOST_SPACE_INTERNAL_CB_SZ);
8441 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8442 			HOST_SPACE_INTERNAL_CB_SZ);
8443 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8444 	mutex_unlock(&hdev->mmu_lock);
8445 
8446 	gen_pool_destroy(hdev->internal_cb_pool);
8447 
8448 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8449 					hdev->internal_cb_pool_dma_addr);
8450 }
8451 
8452 static int gaudi_ctx_init(struct hl_ctx *ctx)
8453 {
8454 	int rc;
8455 
8456 	if (ctx->asid == HL_KERNEL_ASID_ID)
8457 		return 0;
8458 
8459 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8460 	if (rc)
8461 		return rc;
8462 
8463 	rc = gaudi_restore_user_registers(ctx->hdev);
8464 	if (rc)
8465 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8466 
8467 	return rc;
8468 }
8469 
8470 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8471 {
8472 	if (ctx->asid == HL_KERNEL_ASID_ID)
8473 		return;
8474 
8475 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8476 }
8477 
8478 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8479 {
8480 	return 0;
8481 }
8482 
8483 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8484 {
8485 	return gaudi_cq_assignment[cq_idx];
8486 }
8487 
8488 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8489 {
8490 	return sizeof(struct packet_msg_short) +
8491 			sizeof(struct packet_msg_prot) * 2;
8492 }
8493 
8494 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8495 {
8496 	return sizeof(struct packet_msg_short) * 4 +
8497 			sizeof(struct packet_fence) +
8498 			sizeof(struct packet_msg_prot) * 2;
8499 }
8500 
8501 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8502 {
8503 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8504 }
8505 
8506 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8507 				u32 size, bool eb)
8508 {
8509 	struct hl_cb *cb = (struct hl_cb *) data;
8510 	struct packet_msg_short *pkt;
8511 	u32 value, ctl, pkt_size = sizeof(*pkt);
8512 
8513 	pkt = cb->kernel_address + size;
8514 	memset(pkt, 0, pkt_size);
8515 
8516 	/* Inc by 1, Mode ADD */
8517 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8518 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8519 
8520 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8521 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8522 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8523 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8524 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8525 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8526 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8527 
8528 	pkt->value = cpu_to_le32(value);
8529 	pkt->ctl = cpu_to_le32(ctl);
8530 
8531 	return size + pkt_size;
8532 }
8533 
8534 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8535 					u16 addr)
8536 {
8537 	u32 ctl, pkt_size = sizeof(*pkt);
8538 
8539 	memset(pkt, 0, pkt_size);
8540 
8541 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8542 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8543 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8544 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8545 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8546 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8547 
8548 	pkt->value = cpu_to_le32(value);
8549 	pkt->ctl = cpu_to_le32(ctl);
8550 
8551 	return pkt_size;
8552 }
8553 
8554 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8555 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8556 		u16 sob_val, u16 mon_id)
8557 {
8558 	u64 monitor_base;
8559 	u32 ctl, value, pkt_size = sizeof(*pkt);
8560 	u16 msg_addr_offset;
8561 	u8 mask;
8562 
8563 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8564 		dev_err(hdev->dev,
8565 			"sob_base %u (mask %#x) is not valid\n",
8566 			sob_base, sob_mask);
8567 		return 0;
8568 	}
8569 
8570 	/*
8571 	 * monitor_base should be the content of the base0 address registers,
8572 	 * so it will be added to the msg short offsets
8573 	 */
8574 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8575 
8576 	msg_addr_offset =
8577 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8578 				monitor_base;
8579 
8580 	memset(pkt, 0, pkt_size);
8581 
8582 	/* Monitor config packet: bind the monitor to a sync object */
8583 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8584 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8585 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8586 			0); /* GREATER OR EQUAL*/
8587 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8588 
8589 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8590 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8591 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8592 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8593 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8594 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8595 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8596 
8597 	pkt->value = cpu_to_le32(value);
8598 	pkt->ctl = cpu_to_le32(ctl);
8599 
8600 	return pkt_size;
8601 }
8602 
8603 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8604 {
8605 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8606 
8607 	memset(pkt, 0, pkt_size);
8608 
8609 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8610 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8611 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8612 
8613 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8614 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8615 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8616 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8617 
8618 	pkt->cfg = cpu_to_le32(cfg);
8619 	pkt->ctl = cpu_to_le32(ctl);
8620 
8621 	return pkt_size;
8622 }
8623 
8624 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8625 {
8626 	u32 offset, nic_index;
8627 
8628 	switch (queue_id) {
8629 	case GAUDI_QUEUE_ID_DMA_0_0:
8630 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8631 		break;
8632 	case GAUDI_QUEUE_ID_DMA_0_1:
8633 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8634 		break;
8635 	case GAUDI_QUEUE_ID_DMA_0_2:
8636 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8637 		break;
8638 	case GAUDI_QUEUE_ID_DMA_0_3:
8639 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8640 		break;
8641 	case GAUDI_QUEUE_ID_DMA_1_0:
8642 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8643 		break;
8644 	case GAUDI_QUEUE_ID_DMA_1_1:
8645 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8646 		break;
8647 	case GAUDI_QUEUE_ID_DMA_1_2:
8648 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8649 		break;
8650 	case GAUDI_QUEUE_ID_DMA_1_3:
8651 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8652 		break;
8653 	case GAUDI_QUEUE_ID_DMA_5_0:
8654 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8655 		break;
8656 	case GAUDI_QUEUE_ID_DMA_5_1:
8657 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8658 		break;
8659 	case GAUDI_QUEUE_ID_DMA_5_2:
8660 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8661 		break;
8662 	case GAUDI_QUEUE_ID_DMA_5_3:
8663 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8664 		break;
8665 	case GAUDI_QUEUE_ID_TPC_7_0:
8666 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8667 		break;
8668 	case GAUDI_QUEUE_ID_TPC_7_1:
8669 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8670 		break;
8671 	case GAUDI_QUEUE_ID_TPC_7_2:
8672 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8673 		break;
8674 	case GAUDI_QUEUE_ID_TPC_7_3:
8675 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8676 		break;
8677 	case GAUDI_QUEUE_ID_NIC_0_0:
8678 	case GAUDI_QUEUE_ID_NIC_1_0:
8679 	case GAUDI_QUEUE_ID_NIC_2_0:
8680 	case GAUDI_QUEUE_ID_NIC_3_0:
8681 	case GAUDI_QUEUE_ID_NIC_4_0:
8682 	case GAUDI_QUEUE_ID_NIC_5_0:
8683 	case GAUDI_QUEUE_ID_NIC_6_0:
8684 	case GAUDI_QUEUE_ID_NIC_7_0:
8685 	case GAUDI_QUEUE_ID_NIC_8_0:
8686 	case GAUDI_QUEUE_ID_NIC_9_0:
8687 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8688 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8689 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8690 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8691 		break;
8692 	case GAUDI_QUEUE_ID_NIC_0_1:
8693 	case GAUDI_QUEUE_ID_NIC_1_1:
8694 	case GAUDI_QUEUE_ID_NIC_2_1:
8695 	case GAUDI_QUEUE_ID_NIC_3_1:
8696 	case GAUDI_QUEUE_ID_NIC_4_1:
8697 	case GAUDI_QUEUE_ID_NIC_5_1:
8698 	case GAUDI_QUEUE_ID_NIC_6_1:
8699 	case GAUDI_QUEUE_ID_NIC_7_1:
8700 	case GAUDI_QUEUE_ID_NIC_8_1:
8701 	case GAUDI_QUEUE_ID_NIC_9_1:
8702 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8703 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8704 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8705 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8706 		break;
8707 	case GAUDI_QUEUE_ID_NIC_0_2:
8708 	case GAUDI_QUEUE_ID_NIC_1_2:
8709 	case GAUDI_QUEUE_ID_NIC_2_2:
8710 	case GAUDI_QUEUE_ID_NIC_3_2:
8711 	case GAUDI_QUEUE_ID_NIC_4_2:
8712 	case GAUDI_QUEUE_ID_NIC_5_2:
8713 	case GAUDI_QUEUE_ID_NIC_6_2:
8714 	case GAUDI_QUEUE_ID_NIC_7_2:
8715 	case GAUDI_QUEUE_ID_NIC_8_2:
8716 	case GAUDI_QUEUE_ID_NIC_9_2:
8717 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8718 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8719 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8720 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8721 		break;
8722 	case GAUDI_QUEUE_ID_NIC_0_3:
8723 	case GAUDI_QUEUE_ID_NIC_1_3:
8724 	case GAUDI_QUEUE_ID_NIC_2_3:
8725 	case GAUDI_QUEUE_ID_NIC_3_3:
8726 	case GAUDI_QUEUE_ID_NIC_4_3:
8727 	case GAUDI_QUEUE_ID_NIC_5_3:
8728 	case GAUDI_QUEUE_ID_NIC_6_3:
8729 	case GAUDI_QUEUE_ID_NIC_7_3:
8730 	case GAUDI_QUEUE_ID_NIC_8_3:
8731 	case GAUDI_QUEUE_ID_NIC_9_3:
8732 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8733 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8734 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8735 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8736 		break;
8737 	default:
8738 		return -EINVAL;
8739 	}
8740 
8741 	*addr = CFG_BASE + offset;
8742 
8743 	return 0;
8744 }
8745 
8746 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8747 {
8748 	u64 monitor_base;
8749 	u32 size = 0;
8750 	u16 msg_addr_offset;
8751 
8752 	/*
8753 	 * monitor_base should be the content of the base0 address registers,
8754 	 * so it will be added to the msg short offsets
8755 	 */
8756 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8757 
8758 	/* First monitor config packet: low address of the sync */
8759 	msg_addr_offset =
8760 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8761 				monitor_base;
8762 
8763 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8764 					msg_addr_offset);
8765 
8766 	/* Second monitor config packet: high address of the sync */
8767 	msg_addr_offset =
8768 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8769 				monitor_base;
8770 
8771 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8772 					msg_addr_offset);
8773 
8774 	/*
8775 	 * Third monitor config packet: the payload, i.e. what to write when the
8776 	 * sync triggers
8777 	 */
8778 	msg_addr_offset =
8779 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8780 				monitor_base;
8781 
8782 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8783 
8784 	return size;
8785 }
8786 
8787 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8788 				struct hl_gen_wait_properties *prop)
8789 {
8790 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8791 	void *buf = cb->kernel_address;
8792 	u64 fence_addr = 0;
8793 	u32 size = prop->size;
8794 
8795 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8796 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8797 				prop->q_idx);
8798 		return 0;
8799 	}
8800 
8801 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8802 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8803 			prop->sob_mask, prop->sob_val, prop->mon_id);
8804 	size += gaudi_add_fence_pkt(buf + size);
8805 
8806 	return size;
8807 }
8808 
8809 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8810 {
8811 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8812 
8813 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8814 		hw_sob->sob_id);
8815 
8816 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8817 			hw_sob->sob_id * 4, 0);
8818 
8819 	kref_init(&hw_sob->kref);
8820 }
8821 
8822 static u64 gaudi_get_device_time(struct hl_device *hdev)
8823 {
8824 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8825 
8826 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8827 }
8828 
8829 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8830 				u32 *block_size, u32 *block_id)
8831 {
8832 	return -EPERM;
8833 }
8834 
8835 static int gaudi_block_mmap(struct hl_device *hdev,
8836 				struct vm_area_struct *vma,
8837 				u32 block_id, u32 block_size)
8838 {
8839 	return -EPERM;
8840 }
8841 
8842 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8843 {
8844 	struct cpu_dyn_regs *dyn_regs =
8845 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8846 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8847 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8848 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8849 
8850 	WREG32(irq_handler_offset,
8851 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8852 }
8853 
8854 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8855 {
8856 	return -EINVAL;
8857 }
8858 
8859 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8860 {
8861 	switch (pll_idx) {
8862 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8863 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8864 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8865 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8866 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8867 	case HL_GAUDI_MME_PLL: return MME_PLL;
8868 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8869 	case HL_GAUDI_IF_PLL: return IF_PLL;
8870 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8871 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8872 	default: return -EINVAL;
8873 	}
8874 }
8875 
8876 static int gaudi_add_sync_to_engine_map_entry(
8877 	struct hl_sync_to_engine_map *map, u32 reg_value,
8878 	enum hl_sync_engine_type engine_type, u32 engine_id)
8879 {
8880 	struct hl_sync_to_engine_map_entry *entry;
8881 
8882 	/* Reg value represents a partial address of sync object,
8883 	 * it is used as unique identifier. For this we need to
8884 	 * clear the cutoff cfg base bits from the value.
8885 	 */
8886 	if (reg_value == 0 || reg_value == 0xffffffff)
8887 		return 0;
8888 	reg_value -= lower_32_bits(CFG_BASE);
8889 
8890 	/* create a new hash entry */
8891 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8892 	if (!entry)
8893 		return -ENOMEM;
8894 	entry->engine_type = engine_type;
8895 	entry->engine_id = engine_id;
8896 	entry->sync_id = reg_value;
8897 	hash_add(map->tb, &entry->node, reg_value);
8898 
8899 	return 0;
8900 }
8901 
8902 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8903 				struct hl_sync_to_engine_map *map)
8904 {
8905 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8906 	int i, j, rc;
8907 	u32 reg_value;
8908 
8909 	/* Iterate over TPC engines */
8910 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8911 
8912 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8913 					sds->props[SP_NEXT_TPC] * i);
8914 
8915 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8916 							ENGINE_TPC, i);
8917 		if (rc)
8918 			goto free_sync_to_engine_map;
8919 	}
8920 
8921 	/* Iterate over MME engines */
8922 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8923 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8924 
8925 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8926 						sds->props[SP_NEXT_MME] * i +
8927 						j * sizeof(u32));
8928 
8929 			rc = gaudi_add_sync_to_engine_map_entry(
8930 				map, reg_value, ENGINE_MME,
8931 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8932 			if (rc)
8933 				goto free_sync_to_engine_map;
8934 		}
8935 	}
8936 
8937 	/* Iterate over DMA engines */
8938 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8939 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8940 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8941 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8942 							ENGINE_DMA, i);
8943 		if (rc)
8944 			goto free_sync_to_engine_map;
8945 	}
8946 
8947 	return 0;
8948 
8949 free_sync_to_engine_map:
8950 	hl_state_dump_free_sync_to_engine_map(map);
8951 
8952 	return rc;
8953 }
8954 
8955 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8956 {
8957 	return FIELD_GET(
8958 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8959 		mon->status);
8960 }
8961 
8962 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8963 {
8964 	const size_t max_write = 10;
8965 	u32 gid, mask, sob;
8966 	int i, offset;
8967 
8968 	/* Sync object ID is calculated as follows:
8969 	 * (8 * group_id + cleared bits in mask)
8970 	 */
8971 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8972 			mon->arm_data);
8973 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8974 			mon->arm_data);
8975 
8976 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8977 		max_write; mask >>= 1, i++) {
8978 		if (!(mask & 1)) {
8979 			sob = gid * MONITOR_MAX_SOBS + i;
8980 
8981 			if (offset > 0)
8982 				offset += snprintf(sobs + offset, max_write,
8983 							", ");
8984 
8985 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8986 		}
8987 	}
8988 }
8989 
8990 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8991 				struct hl_device *hdev,
8992 				struct hl_mon_state_dump *mon)
8993 {
8994 	const char *name;
8995 	char scratch_buf1[BIN_REG_STRING_SIZE],
8996 		scratch_buf2[BIN_REG_STRING_SIZE];
8997 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8998 
8999 	name = hl_state_dump_get_monitor_name(hdev, mon);
9000 	if (!name)
9001 		name = "";
9002 
9003 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9004 
9005 	return hl_snprintf_resize(
9006 		buf, size, offset,
9007 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9008 		mon->id, name,
9009 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9010 				mon->arm_data),
9011 		hl_format_as_binary(
9012 			scratch_buf1, sizeof(scratch_buf1),
9013 			FIELD_GET(
9014 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9015 				mon->arm_data)),
9016 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9017 				mon->arm_data),
9018 		mon->wr_data,
9019 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9020 		hl_format_as_binary(
9021 			scratch_buf2, sizeof(scratch_buf2),
9022 			FIELD_GET(
9023 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9024 				mon->status)),
9025 		monitored_sobs);
9026 }
9027 
9028 
9029 static int gaudi_print_fences_single_engine(
9030 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9031 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9032 	size_t *size, size_t *offset)
9033 {
9034 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9035 	int rc = -ENOMEM, i;
9036 	u32 *statuses, *fences;
9037 
9038 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9039 			sizeof(*statuses), GFP_KERNEL);
9040 	if (!statuses)
9041 		goto out;
9042 
9043 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9044 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9045 			 sizeof(*fences), GFP_KERNEL);
9046 	if (!fences)
9047 		goto free_status;
9048 
9049 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9050 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9051 
9052 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9053 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9054 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9055 
9056 	/* The actual print */
9057 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9058 		u32 fence_id;
9059 		u64 fence_cnt, fence_rdata;
9060 		const char *engine_name;
9061 
9062 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9063 			statuses[i]))
9064 			continue;
9065 
9066 		fence_id =
9067 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9068 		fence_cnt = base_offset + CFG_BASE +
9069 			sizeof(u32) *
9070 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9071 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9072 				sds->props[SP_FENCE0_RDATA_OFFSET];
9073 		engine_name = hl_sync_engine_to_string(engine_type);
9074 
9075 		rc = hl_snprintf_resize(
9076 			buf, size, offset,
9077 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9078 			engine_name, engine_id,
9079 			i, fence_id,
9080 			fence_cnt, engine_name, engine_id, fence_id, i,
9081 			fence_rdata, engine_name, engine_id, fence_id, i,
9082 			fences[fence_id],
9083 			statuses[i]);
9084 		if (rc)
9085 			goto free_fences;
9086 	}
9087 
9088 	rc = 0;
9089 
9090 free_fences:
9091 	kfree(fences);
9092 free_status:
9093 	kfree(statuses);
9094 out:
9095 	return rc;
9096 }
9097 
9098 
9099 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9100 	.monitor_valid = gaudi_monitor_valid,
9101 	.print_single_monitor = gaudi_print_single_monitor,
9102 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9103 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9104 };
9105 
9106 static void gaudi_state_dump_init(struct hl_device *hdev)
9107 {
9108 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9109 	int i;
9110 
9111 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9112 		hash_add(sds->so_id_to_str_tb,
9113 			&gaudi_so_id_to_str[i].node,
9114 			gaudi_so_id_to_str[i].id);
9115 
9116 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9117 		hash_add(sds->monitor_id_to_str_tb,
9118 			&gaudi_monitor_id_to_str[i].node,
9119 			gaudi_monitor_id_to_str[i].id);
9120 
9121 	sds->props = gaudi_state_dump_specs_props;
9122 
9123 	sds->sync_namager_names = gaudi_sync_manager_names;
9124 
9125 	sds->funcs = gaudi_state_dump_funcs;
9126 }
9127 
9128 static u32 *gaudi_get_stream_master_qid_arr(void)
9129 {
9130 	return gaudi_stream_master;
9131 }
9132 
9133 static int gaudi_set_dram_properties(struct hl_device *hdev)
9134 {
9135 	return 0;
9136 }
9137 
9138 static int gaudi_set_binning_masks(struct hl_device *hdev)
9139 {
9140 	return 0;
9141 }
9142 
9143 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9144 {
9145 }
9146 
9147 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9148 {
9149 	struct hl_device *hdev = dev_get_drvdata(dev);
9150 	struct cpucp_info *cpucp_info;
9151 
9152 	cpucp_info = &hdev->asic_prop.cpucp_info;
9153 
9154 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9155 }
9156 
9157 static DEVICE_ATTR_RO(infineon_ver);
9158 
9159 static struct attribute *gaudi_vrm_dev_attrs[] = {
9160 	&dev_attr_infineon_ver.attr,
9161 	NULL,
9162 };
9163 
9164 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9165 					struct attribute_group *dev_vrm_attr_grp)
9166 {
9167 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9168 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9169 }
9170 
9171 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9172 {
9173 	return 0;
9174 }
9175 
9176 static const struct hl_asic_funcs gaudi_funcs = {
9177 	.early_init = gaudi_early_init,
9178 	.early_fini = gaudi_early_fini,
9179 	.late_init = gaudi_late_init,
9180 	.late_fini = gaudi_late_fini,
9181 	.sw_init = gaudi_sw_init,
9182 	.sw_fini = gaudi_sw_fini,
9183 	.hw_init = gaudi_hw_init,
9184 	.hw_fini = gaudi_hw_fini,
9185 	.halt_engines = gaudi_halt_engines,
9186 	.suspend = gaudi_suspend,
9187 	.resume = gaudi_resume,
9188 	.mmap = gaudi_mmap,
9189 	.ring_doorbell = gaudi_ring_doorbell,
9190 	.pqe_write = gaudi_pqe_write,
9191 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9192 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9193 	.scrub_device_mem = gaudi_scrub_device_mem,
9194 	.scrub_device_dram = gaudi_scrub_device_dram,
9195 	.get_int_queue_base = gaudi_get_int_queue_base,
9196 	.test_queues = gaudi_test_queues,
9197 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9198 	.asic_dma_pool_free = gaudi_dma_pool_free,
9199 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9200 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9201 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9202 	.cs_parser = gaudi_cs_parser,
9203 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9204 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9205 	.update_eq_ci = gaudi_update_eq_ci,
9206 	.context_switch = gaudi_context_switch,
9207 	.restore_phase_topology = gaudi_restore_phase_topology,
9208 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9209 	.add_device_attr = gaudi_add_device_attr,
9210 	.handle_eqe = gaudi_handle_eqe,
9211 	.get_events_stat = gaudi_get_events_stat,
9212 	.read_pte = gaudi_read_pte,
9213 	.write_pte = gaudi_write_pte,
9214 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9215 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9216 	.mmu_prefetch_cache_range = NULL,
9217 	.send_heartbeat = gaudi_send_heartbeat,
9218 	.debug_coresight = gaudi_debug_coresight,
9219 	.is_device_idle = gaudi_is_device_idle,
9220 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9221 	.hw_queues_lock = gaudi_hw_queues_lock,
9222 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9223 	.get_pci_id = gaudi_get_pci_id,
9224 	.get_eeprom_data = gaudi_get_eeprom_data,
9225 	.get_monitor_dump = gaudi_get_monitor_dump,
9226 	.send_cpu_message = gaudi_send_cpu_message,
9227 	.pci_bars_map = gaudi_pci_bars_map,
9228 	.init_iatu = gaudi_init_iatu,
9229 	.rreg = hl_rreg,
9230 	.wreg = hl_wreg,
9231 	.halt_coresight = gaudi_halt_coresight,
9232 	.ctx_init = gaudi_ctx_init,
9233 	.ctx_fini = gaudi_ctx_fini,
9234 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9235 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9236 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9237 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9238 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9239 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9240 	.gen_signal_cb = gaudi_gen_signal_cb,
9241 	.gen_wait_cb = gaudi_gen_wait_cb,
9242 	.reset_sob = gaudi_reset_sob,
9243 	.reset_sob_group = gaudi_reset_sob_group,
9244 	.get_device_time = gaudi_get_device_time,
9245 	.pb_print_security_errors = NULL,
9246 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9247 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9248 	.get_dec_base_addr = NULL,
9249 	.scramble_addr = hl_mmu_scramble_addr,
9250 	.descramble_addr = hl_mmu_descramble_addr,
9251 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9252 	.get_hw_block_id = gaudi_get_hw_block_id,
9253 	.hw_block_mmap = gaudi_block_mmap,
9254 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9255 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9256 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9257 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9258 	.init_firmware_loader = gaudi_init_firmware_loader,
9259 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9260 	.state_dump_init = gaudi_state_dump_init,
9261 	.get_sob_addr = gaudi_get_sob_addr,
9262 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9263 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9264 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9265 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9266 	.access_dev_mem = hl_access_dev_mem,
9267 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9268 	.send_device_activity = gaudi_send_device_activity,
9269 	.set_dram_properties = gaudi_set_dram_properties,
9270 	.set_binning_masks = gaudi_set_binning_masks,
9271 };
9272 
9273 /**
9274  * gaudi_set_asic_funcs - set GAUDI function pointers
9275  *
9276  * @hdev: pointer to hl_device structure
9277  *
9278  */
9279 void gaudi_set_asic_funcs(struct hl_device *hdev)
9280 {
9281 	hdev->asic_funcs = &gaudi_funcs;
9282 }
9283