1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
67 
68 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
72 
73 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
82 
83 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
84 
85 #define GAUDI_MAX_STRING_LEN		20
86 
87 #define GAUDI_CB_POOL_CB_CNT		512
88 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
89 
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
91 
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
93 
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
95 
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
97 
98 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
99 
100 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
101 
102 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
103 
104 #define MONITOR_SOB_STRING_SIZE		256
105 
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 	GAUDI_QUEUE_ID_DMA_0_0,
108 	GAUDI_QUEUE_ID_DMA_0_1,
109 	GAUDI_QUEUE_ID_DMA_0_2,
110 	GAUDI_QUEUE_ID_DMA_0_3,
111 	GAUDI_QUEUE_ID_DMA_1_0,
112 	GAUDI_QUEUE_ID_DMA_1_1,
113 	GAUDI_QUEUE_ID_DMA_1_2,
114 	GAUDI_QUEUE_ID_DMA_1_3
115 };
116 
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 		"gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121 		"gaudi cpu eq"
122 };
123 
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134 
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
137 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
138 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
139 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
140 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
141 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
142 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
143 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145 
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
148 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
149 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
150 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
151 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
152 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
153 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
154 	[PACKET_FENCE]		= sizeof(struct packet_fence),
155 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
156 	[PACKET_NOP]		= sizeof(struct packet_nop),
157 	[PACKET_STOP]		= sizeof(struct packet_stop),
158 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
159 	[PACKET_WAIT]		= sizeof(struct packet_wait),
160 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
161 };
162 
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165 	switch (id) {
166 	case PACKET_WREG_32:
167 	case PACKET_WREG_BULK:
168 	case PACKET_MSG_LONG:
169 	case PACKET_MSG_SHORT:
170 	case PACKET_CP_DMA:
171 	case PACKET_REPEAT:
172 	case PACKET_MSG_PROT:
173 	case PACKET_FENCE:
174 	case PACKET_LIN_DMA:
175 	case PACKET_NOP:
176 	case PACKET_STOP:
177 	case PACKET_ARB_POINT:
178 	case PACKET_WAIT:
179 	case PACKET_LOAD_AND_EXE:
180 		return true;
181 	default:
182 		return false;
183 	}
184 }
185 
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 	"tpc_address_exceed_slm",
189 	"tpc_div_by_0",
190 	"tpc_spu_mac_overflow",
191 	"tpc_spu_addsub_overflow",
192 	"tpc_spu_abs_overflow",
193 	"tpc_spu_fp_dst_nan_inf",
194 	"tpc_spu_fp_dst_denorm",
195 	"tpc_vpu_mac_overflow",
196 	"tpc_vpu_addsub_overflow",
197 	"tpc_vpu_abs_overflow",
198 	"tpc_vpu_fp_dst_nan_inf",
199 	"tpc_vpu_fp_dst_denorm",
200 	"tpc_assertions",
201 	"tpc_illegal_instruction",
202 	"tpc_pc_wrap_around",
203 	"tpc_qm_sw_err",
204 	"tpc_hbw_rresp_err",
205 	"tpc_hbw_bresp_err",
206 	"tpc_lbw_rresp_err",
207 	"tpc_lbw_bresp_err"
208 };
209 
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212 	"PQ AXI HBW error",
213 	"CQ AXI HBW error",
214 	"CP AXI HBW error",
215 	"CP error due to undefined OPCODE",
216 	"CP encountered STOP OPCODE",
217 	"CP AXI LBW error",
218 	"CP WRREG32 or WRBULK returned error",
219 	"N/A",
220 	"FENCE 0 inc over max value and clipped",
221 	"FENCE 1 inc over max value and clipped",
222 	"FENCE 2 inc over max value and clipped",
223 	"FENCE 3 inc over max value and clipped",
224 	"FENCE 0 dec under min value and clipped",
225 	"FENCE 1 dec under min value and clipped",
226 	"FENCE 2 dec under min value and clipped",
227 	"FENCE 3 dec under min value and clipped"
228 };
229 
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 	"Choice push while full error",
233 	"Choice Q watchdog error",
234 	"MSG AXI LBW returned with error"
235 };
236 
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352 
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382 
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396 
397 static s64 gaudi_state_dump_specs_props[] = {
398 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 	[SP_MON_OBJ_WR_ADDR_LOW] =
402 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 	[SP_MON_OBJ_WR_ADDR_HIGH] =
404 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 	[SP_FENCE0_CNT_OFFSET] =
426 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_FENCE0_RDATA_OFFSET] =
428 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430 	[SP_NUM_CORES] = 1,
431 };
432 
433 static const int gaudi_queue_id_to_engine_id[] = {
434 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464 
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469 	"SYNC_MGR_E_N",
470 	"SYNC_MGR_W_N",
471 	"SYNC_MGR_E_S",
472 	"SYNC_MGR_W_S",
473 	NULL
474 };
475 
476 struct ecc_info_extract_params {
477 	u64 block_address;
478 	u32 num_memories;
479 	bool derr;
480 };
481 
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483 								u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 					struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487 					u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 					u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491 				u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497 				u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 				struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 		return HL_COLLECTIVE_MASTER;
505 
506 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 		return HL_COLLECTIVE_SLAVE;
509 
510 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 		return HL_COLLECTIVE_SLAVE;
513 
514 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 		return HL_COLLECTIVE_SLAVE;
517 
518 	return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520 
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523 	struct asic_fixed_properties *prop = &hdev->asic_prop;
524 
525 	if (hdev->card_type == cpucp_card_type_pmc) {
526 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527 
528 		if (prop->fw_security_enabled)
529 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530 		else
531 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532 	} else {
533 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535 	}
536 }
537 
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540 	struct asic_fixed_properties *prop = &hdev->asic_prop;
541 	u32 num_sync_stream_queues = 0;
542 	int i;
543 
544 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 	prop->hw_queues_props = kcalloc(prop->max_queues,
546 			sizeof(struct hw_queue_properties),
547 			GFP_KERNEL);
548 
549 	if (!prop->hw_queues_props)
550 		return -ENOMEM;
551 
552 	for (i = 0 ; i < prop->max_queues ; i++) {
553 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 			prop->hw_queues_props[i].driver_only = 0;
556 			prop->hw_queues_props[i].supports_sync_stream = 1;
557 			prop->hw_queues_props[i].cb_alloc_flags =
558 				CB_ALLOC_KERNEL;
559 			num_sync_stream_queues++;
560 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 			prop->hw_queues_props[i].driver_only = 1;
563 			prop->hw_queues_props[i].supports_sync_stream = 0;
564 			prop->hw_queues_props[i].cb_alloc_flags =
565 				CB_ALLOC_KERNEL;
566 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 			prop->hw_queues_props[i].driver_only = 0;
569 			prop->hw_queues_props[i].supports_sync_stream = 0;
570 			prop->hw_queues_props[i].cb_alloc_flags =
571 				CB_ALLOC_USER;
572 
573 		}
574 		prop->hw_queues_props[i].collective_mode =
575 						get_collective_mode(hdev, i);
576 	}
577 
578 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 	prop->cfg_base_address = CFG_BASE;
580 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 	prop->host_base_address = HOST_PHYS_BASE;
582 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 	prop->collective_first_sob = 0;
586 	prop->collective_first_mon = 0;
587 
588 	/* 2 SOBs per internal queue stream are reserved for collective */
589 	prop->sync_stream_first_sob =
590 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 			* QMAN_STREAMS * HL_RSVD_SOBS;
592 
593 	/* 1 monitor per internal queue stream are reserved for collective
594 	 * 2 monitors per external queue stream are reserved for collective
595 	 */
596 	prop->sync_stream_first_mon =
597 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 			(NUMBER_OF_EXT_HW_QUEUES * 2);
599 
600 	prop->dram_base_address = DRAM_PHYS_BASE;
601 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604 
605 	prop->sram_base_address = SRAM_BASE_ADDR;
606 	prop->sram_size = SRAM_SIZE;
607 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 	prop->sram_user_base_address =
609 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610 
611 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613 
614 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615 	if (hdev->pldm)
616 		prop->mmu_pgt_size = 0x800000; /* 8MB */
617 	else
618 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 	prop->mmu_pte_size = HL_PTE_SIZE;
620 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 	prop->dram_page_size = PAGE_SIZE_2MB;
623 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 	prop->dram_supports_virtual_memory = false;
625 
626 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 	prop->pmmu.end_addr =
638 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 	prop->pmmu.page_size = PAGE_SIZE_4KB;
640 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 	prop->pmmu.last_mask = LAST_MASK;
642 	/* TODO: will be duplicated until implementing per-MMU props */
643 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645 
646 	/* PMMU and HPMMU are the same except of page size */
647 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649 
650 	/* shifts and masks are the same in PMMU and DMMU */
651 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 	prop->dmmu.page_size = PAGE_SIZE_2MB;
655 
656 	prop->cfg_size = CFG_SIZE;
657 	prop->max_asid = MAX_ASID;
658 	prop->num_of_events = GAUDI_EVENT_SIZE;
659 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
660 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
661 
662 	set_default_power_values(hdev);
663 
664 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
665 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
666 
667 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
668 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
669 
670 	strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
671 					CARD_NAME_MAX_LEN);
672 
673 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
674 
675 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
676 			prop->sync_stream_first_sob +
677 			(num_sync_stream_queues * HL_RSVD_SOBS);
678 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
679 			prop->sync_stream_first_mon +
680 			(num_sync_stream_queues * HL_RSVD_MONS);
681 
682 	prop->first_available_user_interrupt = USHRT_MAX;
683 	prop->tpc_interrupt_id = USHRT_MAX;
684 
685 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
686 		prop->first_available_cq[i] = USHRT_MAX;
687 
688 	prop->fw_cpu_boot_dev_sts0_valid = false;
689 	prop->fw_cpu_boot_dev_sts1_valid = false;
690 	prop->hard_reset_done_by_fw = false;
691 	prop->gic_interrupts_enable = true;
692 
693 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
694 
695 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
696 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
697 
698 	prop->use_get_power_for_reset_history = true;
699 
700 	prop->configurable_stop_on_err = true;
701 
702 	prop->set_max_power_on_device_init = true;
703 
704 	prop->dma_mask = 48;
705 
706 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
707 
708 	return 0;
709 }
710 
711 static int gaudi_pci_bars_map(struct hl_device *hdev)
712 {
713 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
714 	bool is_wc[3] = {false, false, true};
715 	int rc;
716 
717 	rc = hl_pci_bars_map(hdev, name, is_wc);
718 	if (rc)
719 		return rc;
720 
721 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
722 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
723 
724 	return 0;
725 }
726 
727 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
728 {
729 	struct gaudi_device *gaudi = hdev->asic_specific;
730 	struct hl_inbound_pci_region pci_region;
731 	u64 old_addr = addr;
732 	int rc;
733 
734 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
735 		return old_addr;
736 
737 	if (hdev->asic_prop.iatu_done_by_fw)
738 		return U64_MAX;
739 
740 	/* Inbound Region 2 - Bar 4 - Point to HBM */
741 	pci_region.mode = PCI_BAR_MATCH_MODE;
742 	pci_region.bar = HBM_BAR_ID;
743 	pci_region.addr = addr;
744 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
745 	if (rc)
746 		return U64_MAX;
747 
748 	if (gaudi) {
749 		old_addr = gaudi->hbm_bar_cur_addr;
750 		gaudi->hbm_bar_cur_addr = addr;
751 	}
752 
753 	return old_addr;
754 }
755 
756 static int gaudi_init_iatu(struct hl_device *hdev)
757 {
758 	struct hl_inbound_pci_region inbound_region;
759 	struct hl_outbound_pci_region outbound_region;
760 	int rc;
761 
762 	if (hdev->asic_prop.iatu_done_by_fw)
763 		return 0;
764 
765 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
766 	inbound_region.mode = PCI_BAR_MATCH_MODE;
767 	inbound_region.bar = SRAM_BAR_ID;
768 	inbound_region.addr = SRAM_BASE_ADDR;
769 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
770 	if (rc)
771 		goto done;
772 
773 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
774 	inbound_region.mode = PCI_BAR_MATCH_MODE;
775 	inbound_region.bar = CFG_BAR_ID;
776 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
777 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
778 	if (rc)
779 		goto done;
780 
781 	/* Inbound Region 2 - Bar 4 - Point to HBM */
782 	inbound_region.mode = PCI_BAR_MATCH_MODE;
783 	inbound_region.bar = HBM_BAR_ID;
784 	inbound_region.addr = DRAM_PHYS_BASE;
785 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
786 	if (rc)
787 		goto done;
788 
789 	/* Outbound Region 0 - Point to Host */
790 	outbound_region.addr = HOST_PHYS_BASE;
791 	outbound_region.size = HOST_PHYS_SIZE;
792 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
793 
794 done:
795 	return rc;
796 }
797 
798 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
799 {
800 	return RREG32(mmHW_STATE);
801 }
802 
803 static int gaudi_early_init(struct hl_device *hdev)
804 {
805 	struct asic_fixed_properties *prop = &hdev->asic_prop;
806 	struct pci_dev *pdev = hdev->pdev;
807 	resource_size_t pci_bar_size;
808 	u32 fw_boot_status;
809 	int rc;
810 
811 	rc = gaudi_set_fixed_properties(hdev);
812 	if (rc) {
813 		dev_err(hdev->dev, "Failed setting fixed properties\n");
814 		return rc;
815 	}
816 
817 	/* Check BAR sizes */
818 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
819 
820 	if (pci_bar_size != SRAM_BAR_SIZE) {
821 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
822 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
823 		rc = -ENODEV;
824 		goto free_queue_props;
825 	}
826 
827 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
828 
829 	if (pci_bar_size != CFG_BAR_SIZE) {
830 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
831 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
832 		rc = -ENODEV;
833 		goto free_queue_props;
834 	}
835 
836 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
837 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
838 
839 	/* If FW security is enabled at this point it means no access to ELBI */
840 	if (hdev->asic_prop.fw_security_enabled) {
841 		hdev->asic_prop.iatu_done_by_fw = true;
842 
843 		/*
844 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
845 		 * decision can only be taken based on PCI ID security.
846 		 */
847 		hdev->asic_prop.gic_interrupts_enable = false;
848 		goto pci_init;
849 	}
850 
851 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
852 				&fw_boot_status);
853 	if (rc)
854 		goto free_queue_props;
855 
856 	/* Check whether FW is configuring iATU */
857 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
858 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
859 		hdev->asic_prop.iatu_done_by_fw = true;
860 
861 pci_init:
862 	rc = hl_pci_init(hdev);
863 	if (rc)
864 		goto free_queue_props;
865 
866 	/* Before continuing in the initialization, we need to read the preboot
867 	 * version to determine whether we run with a security-enabled firmware
868 	 */
869 	rc = hl_fw_read_preboot_status(hdev);
870 	if (rc) {
871 		if (hdev->reset_on_preboot_fail)
872 			/* we are already on failure flow, so don't check if hw_fini fails. */
873 			hdev->asic_funcs->hw_fini(hdev, true, false);
874 		goto pci_fini;
875 	}
876 
877 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
878 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
879 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
880 		if (rc) {
881 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
882 			goto pci_fini;
883 		}
884 	}
885 
886 	return 0;
887 
888 pci_fini:
889 	hl_pci_fini(hdev);
890 free_queue_props:
891 	kfree(hdev->asic_prop.hw_queues_props);
892 	return rc;
893 }
894 
895 static int gaudi_early_fini(struct hl_device *hdev)
896 {
897 	kfree(hdev->asic_prop.hw_queues_props);
898 	hl_pci_fini(hdev);
899 
900 	return 0;
901 }
902 
903 /**
904  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
905  *
906  * @hdev: pointer to hl_device structure
907  *
908  */
909 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
910 {
911 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
912 	struct asic_fixed_properties *prop = &hdev->asic_prop;
913 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
914 	int rc;
915 
916 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
917 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
918 		struct gaudi_device *gaudi = hdev->asic_specific;
919 
920 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
921 			return 0;
922 
923 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
924 
925 		if (rc)
926 			return rc;
927 
928 		freq = pll_freq_arr[2];
929 	} else {
930 		/* Backward compatibility */
931 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
932 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
933 		nr = RREG32(mmPSOC_CPU_PLL_NR);
934 		nf = RREG32(mmPSOC_CPU_PLL_NF);
935 		od = RREG32(mmPSOC_CPU_PLL_OD);
936 
937 		if (div_sel == DIV_SEL_REF_CLK ||
938 				div_sel == DIV_SEL_DIVIDED_REF) {
939 			if (div_sel == DIV_SEL_REF_CLK)
940 				freq = PLL_REF_CLK;
941 			else
942 				freq = PLL_REF_CLK / (div_fctr + 1);
943 		} else if (div_sel == DIV_SEL_PLL_CLK ||
944 			div_sel == DIV_SEL_DIVIDED_PLL) {
945 			pll_clk = PLL_REF_CLK * (nf + 1) /
946 					((nr + 1) * (od + 1));
947 			if (div_sel == DIV_SEL_PLL_CLK)
948 				freq = pll_clk;
949 			else
950 				freq = pll_clk / (div_fctr + 1);
951 		} else {
952 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
953 			freq = 0;
954 		}
955 	}
956 
957 	prop->psoc_timestamp_frequency = freq;
958 	prop->psoc_pci_pll_nr = nr;
959 	prop->psoc_pci_pll_nf = nf;
960 	prop->psoc_pci_pll_od = od;
961 	prop->psoc_pci_pll_div_factor = div_fctr;
962 
963 	return 0;
964 }
965 
966 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
967 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
968 {
969 	struct asic_fixed_properties *prop = &hdev->asic_prop;
970 	struct packet_lin_dma *init_tpc_mem_pkt;
971 	struct hl_cs_job *job;
972 	struct hl_cb *cb;
973 	u64 dst_addr;
974 	u32 cb_size, ctl;
975 	u8 tpc_id;
976 	int rc;
977 
978 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
979 	if (!cb)
980 		return -EFAULT;
981 
982 	init_tpc_mem_pkt = cb->kernel_address;
983 	cb_size = sizeof(*init_tpc_mem_pkt);
984 	memset(init_tpc_mem_pkt, 0, cb_size);
985 
986 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
987 
988 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
989 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
990 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
991 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
992 
993 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
994 
995 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
996 
997 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
998 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
999 				round_up(prop->sram_user_base_address, SZ_8K));
1000 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1001 
1002 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1003 	if (!job) {
1004 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1005 		rc = -ENOMEM;
1006 		goto release_cb;
1007 	}
1008 
1009 	job->id = 0;
1010 	job->user_cb = cb;
1011 	atomic_inc(&job->user_cb->cs_cnt);
1012 	job->user_cb_size = cb_size;
1013 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1014 	job->patched_cb = job->user_cb;
1015 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1016 
1017 	hl_debugfs_add_job(hdev, job);
1018 
1019 	rc = gaudi_send_job_on_qman0(hdev, job);
1020 
1021 	if (rc)
1022 		goto free_job;
1023 
1024 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1025 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1026 		if (rc)
1027 			break;
1028 	}
1029 
1030 free_job:
1031 	hl_userptr_delete_list(hdev, &job->userptr_list);
1032 	hl_debugfs_remove_job(hdev, job);
1033 	kfree(job);
1034 	atomic_dec(&cb->cs_cnt);
1035 
1036 release_cb:
1037 	hl_cb_put(cb);
1038 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1039 
1040 	return rc;
1041 }
1042 
1043 /*
1044  * gaudi_init_tpc_mem() - Initialize TPC memories.
1045  * @hdev: Pointer to hl_device structure.
1046  *
1047  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1048  *
1049  * Return: 0 for success, negative value for error.
1050  */
1051 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1052 {
1053 	const struct firmware *fw;
1054 	size_t fw_size;
1055 	void *cpu_addr;
1056 	dma_addr_t dma_handle;
1057 	int rc, count = 5;
1058 
1059 again:
1060 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1061 	if (rc == -EINTR && count-- > 0) {
1062 		msleep(50);
1063 		goto again;
1064 	}
1065 
1066 	if (rc) {
1067 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1068 				GAUDI_TPC_FW_FILE);
1069 		goto out;
1070 	}
1071 
1072 	fw_size = fw->size;
1073 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1074 	if (!cpu_addr) {
1075 		dev_err(hdev->dev,
1076 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1077 			fw_size);
1078 		rc = -ENOMEM;
1079 		goto out;
1080 	}
1081 
1082 	memcpy(cpu_addr, fw->data, fw_size);
1083 
1084 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1085 
1086 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1087 
1088 out:
1089 	release_firmware(fw);
1090 	return rc;
1091 }
1092 
1093 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1094 {
1095 	struct gaudi_device *gaudi = hdev->asic_specific;
1096 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1097 	struct hl_hw_queue *q;
1098 	u32 i, sob_id, sob_group_id, queue_id;
1099 
1100 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1101 	sob_group_id =
1102 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1103 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1104 
1105 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1106 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1107 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1108 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1109 	}
1110 
1111 	/* Both DMA5 and TPC7 use the same resources since only a single
1112 	 * engine need to participate in the reduction process
1113 	 */
1114 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1115 	q = &hdev->kernel_queues[queue_id];
1116 	q->sync_stream_prop.collective_sob_id =
1117 			sob_id + NIC_NUMBER_OF_ENGINES;
1118 
1119 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1120 	q = &hdev->kernel_queues[queue_id];
1121 	q->sync_stream_prop.collective_sob_id =
1122 			sob_id + NIC_NUMBER_OF_ENGINES;
1123 }
1124 
1125 static void gaudi_sob_group_hw_reset(struct kref *ref)
1126 {
1127 	struct gaudi_hw_sob_group *hw_sob_group =
1128 		container_of(ref, struct gaudi_hw_sob_group, kref);
1129 	struct hl_device *hdev = hw_sob_group->hdev;
1130 	int i;
1131 
1132 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1133 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1134 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1135 
1136 	kref_init(&hw_sob_group->kref);
1137 }
1138 
1139 static void gaudi_sob_group_reset_error(struct kref *ref)
1140 {
1141 	struct gaudi_hw_sob_group *hw_sob_group =
1142 		container_of(ref, struct gaudi_hw_sob_group, kref);
1143 	struct hl_device *hdev = hw_sob_group->hdev;
1144 
1145 	dev_crit(hdev->dev,
1146 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1147 		hw_sob_group->base_sob_id);
1148 }
1149 
1150 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1151 {
1152 	struct gaudi_collective_properties *prop;
1153 	int i;
1154 
1155 	prop = &gaudi->collective_props;
1156 
1157 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1158 
1159 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1160 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1161 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1162 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1163 	/* Set collective engine bit */
1164 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1165 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1166 }
1167 
1168 static int gaudi_collective_init(struct hl_device *hdev)
1169 {
1170 	u32 i, sob_id, reserved_sobs_per_group;
1171 	struct gaudi_collective_properties *prop;
1172 	struct gaudi_device *gaudi;
1173 
1174 	gaudi = hdev->asic_specific;
1175 	prop = &gaudi->collective_props;
1176 	sob_id = hdev->asic_prop.collective_first_sob;
1177 
1178 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1179 	reserved_sobs_per_group =
1180 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1181 
1182 	/* Init SOB groups */
1183 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1184 		prop->hw_sob_group[i].hdev = hdev;
1185 		prop->hw_sob_group[i].base_sob_id = sob_id;
1186 		sob_id += reserved_sobs_per_group;
1187 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1188 	}
1189 
1190 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1191 		prop->next_sob_group_val[i] = 1;
1192 		prop->curr_sob_group_idx[i] = 0;
1193 		gaudi_collective_map_sobs(hdev, i);
1194 	}
1195 
1196 	gaudi_collective_mstr_sob_mask_set(gaudi);
1197 
1198 	return 0;
1199 }
1200 
1201 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1202 {
1203 	struct gaudi_device *gaudi = hdev->asic_specific;
1204 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1205 
1206 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1207 					gaudi_sob_group_hw_reset);
1208 }
1209 
1210 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1211 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1212 {
1213 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1214 	struct gaudi_collective_properties *cprop;
1215 	struct hl_gen_wait_properties wait_prop;
1216 	struct hl_sync_stream_properties *prop;
1217 	struct gaudi_device *gaudi;
1218 
1219 	gaudi = hdev->asic_specific;
1220 	cprop = &gaudi->collective_props;
1221 	queue_id = job->hw_queue_id;
1222 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1223 
1224 	master_sob_base =
1225 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1226 	master_monitor = prop->collective_mstr_mon_id[0];
1227 
1228 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1229 
1230 	dev_dbg(hdev->dev,
1231 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1232 		master_sob_base, cprop->mstr_sob_mask[0],
1233 		cprop->next_sob_group_val[stream],
1234 		master_monitor, queue_id);
1235 
1236 	wait_prop.data = (void *) job->patched_cb;
1237 	wait_prop.sob_base = master_sob_base;
1238 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1239 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1240 	wait_prop.mon_id = master_monitor;
1241 	wait_prop.q_idx = queue_id;
1242 	wait_prop.size = cb_size;
1243 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1244 
1245 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1246 	master_monitor = prop->collective_mstr_mon_id[1];
1247 
1248 	dev_dbg(hdev->dev,
1249 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1250 		master_sob_base, cprop->mstr_sob_mask[1],
1251 		cprop->next_sob_group_val[stream],
1252 		master_monitor, queue_id);
1253 
1254 	wait_prop.sob_base = master_sob_base;
1255 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1256 	wait_prop.mon_id = master_monitor;
1257 	wait_prop.size = cb_size;
1258 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1259 }
1260 
1261 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1262 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1263 {
1264 	struct hl_gen_wait_properties wait_prop;
1265 	struct hl_sync_stream_properties *prop;
1266 	u32 queue_id, cb_size = 0;
1267 
1268 	queue_id = job->hw_queue_id;
1269 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1270 
1271 	if (job->cs->encaps_signals) {
1272 		/* use the encaps signal handle store earlier in the flow
1273 		 * and set the SOB information from the encaps
1274 		 * signals handle
1275 		 */
1276 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1277 						cs_cmpl);
1278 
1279 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1280 				job->cs->sequence,
1281 				cs_cmpl->hw_sob->sob_id,
1282 				cs_cmpl->sob_val);
1283 	}
1284 
1285 	/* Add to wait CBs using slave monitor */
1286 	wait_prop.data = (void *) job->user_cb;
1287 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1288 	wait_prop.sob_mask = 0x1;
1289 	wait_prop.sob_val = cs_cmpl->sob_val;
1290 	wait_prop.mon_id = prop->collective_slave_mon_id;
1291 	wait_prop.q_idx = queue_id;
1292 	wait_prop.size = cb_size;
1293 
1294 	dev_dbg(hdev->dev,
1295 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1296 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1297 		prop->collective_slave_mon_id, queue_id);
1298 
1299 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1300 
1301 	dev_dbg(hdev->dev,
1302 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1303 		prop->collective_sob_id, queue_id);
1304 
1305 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1306 			prop->collective_sob_id, cb_size, false);
1307 }
1308 
1309 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1310 {
1311 	struct hl_cs_compl *signal_cs_cmpl =
1312 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1313 	struct hl_cs_compl *cs_cmpl =
1314 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1315 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1316 	struct gaudi_collective_properties *cprop;
1317 	u32 stream, queue_id, sob_group_offset;
1318 	struct gaudi_device *gaudi;
1319 	struct hl_device *hdev;
1320 	struct hl_cs_job *job;
1321 	struct hl_ctx *ctx;
1322 
1323 	ctx = cs->ctx;
1324 	hdev = ctx->hdev;
1325 	gaudi = hdev->asic_specific;
1326 	cprop = &gaudi->collective_props;
1327 
1328 	if (cs->encaps_signals) {
1329 		cs_cmpl->hw_sob = handle->hw_sob;
1330 		/* at this checkpoint we only need the hw_sob pointer
1331 		 * for the completion check before start going over the jobs
1332 		 * of the master/slaves, the sob_value will be taken later on
1333 		 * in gaudi_collective_slave_init_job depends on each
1334 		 * job wait offset value.
1335 		 */
1336 		cs_cmpl->sob_val = 0;
1337 	} else {
1338 		/* copy the SOB id and value of the signal CS */
1339 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1340 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1341 	}
1342 
1343 	/* check again if the signal cs already completed.
1344 	 * if yes then don't send any wait cs since the hw_sob
1345 	 * could be in reset already. if signal is not completed
1346 	 * then get refcount to hw_sob to prevent resetting the sob
1347 	 * while wait cs is not submitted.
1348 	 * note that this check is protected by two locks,
1349 	 * hw queue lock and completion object lock,
1350 	 * and the same completion object lock also protects
1351 	 * the hw_sob reset handler function.
1352 	 * The hw_queue lock prevent out of sync of hw_sob
1353 	 * refcount value, changed by signal/wait flows.
1354 	 */
1355 	spin_lock(&signal_cs_cmpl->lock);
1356 
1357 	if (completion_done(&cs->signal_fence->completion)) {
1358 		spin_unlock(&signal_cs_cmpl->lock);
1359 		return -EINVAL;
1360 	}
1361 	/* Increment kref since all slave queues are now waiting on it */
1362 	kref_get(&cs_cmpl->hw_sob->kref);
1363 
1364 	spin_unlock(&signal_cs_cmpl->lock);
1365 
1366 	/* Calculate the stream from collective master queue (1st job) */
1367 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1368 	stream = job->hw_queue_id % 4;
1369 	sob_group_offset =
1370 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1371 
1372 	list_for_each_entry(job, &cs->job_list, cs_node) {
1373 		queue_id = job->hw_queue_id;
1374 
1375 		if (hdev->kernel_queues[queue_id].collective_mode ==
1376 				HL_COLLECTIVE_MASTER)
1377 			gaudi_collective_master_init_job(hdev, job, stream,
1378 						sob_group_offset);
1379 		else
1380 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1381 	}
1382 
1383 	cs_cmpl->sob_group = sob_group_offset;
1384 
1385 	/* Handle sob group kref and wraparound */
1386 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1387 	cprop->next_sob_group_val[stream]++;
1388 
1389 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1390 		/*
1391 		 * Decrement as we reached the max value.
1392 		 * The release function won't be called here as we've
1393 		 * just incremented the refcount.
1394 		 */
1395 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1396 				gaudi_sob_group_reset_error);
1397 		cprop->next_sob_group_val[stream] = 1;
1398 		/* only two SOBs are currently in use */
1399 		cprop->curr_sob_group_idx[stream] =
1400 			(cprop->curr_sob_group_idx[stream] + 1) &
1401 							(HL_RSVD_SOBS - 1);
1402 
1403 		gaudi_collective_map_sobs(hdev, stream);
1404 
1405 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1406 				cprop->curr_sob_group_idx[stream], stream);
1407 	}
1408 
1409 	mb();
1410 	hl_fence_put(cs->signal_fence);
1411 	cs->signal_fence = NULL;
1412 
1413 	return 0;
1414 }
1415 
1416 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1417 {
1418 	u32 cacheline_end, additional_commands;
1419 
1420 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1421 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1422 
1423 	if (user_cb_size + additional_commands > cacheline_end)
1424 		return cacheline_end - user_cb_size + additional_commands;
1425 	else
1426 		return additional_commands;
1427 }
1428 
1429 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1430 		struct hl_ctx *ctx, struct hl_cs *cs,
1431 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1432 		u32 encaps_signal_offset)
1433 {
1434 	struct hw_queue_properties *hw_queue_prop;
1435 	struct hl_cs_counters_atomic *cntr;
1436 	struct hl_cs_job *job;
1437 	struct hl_cb *cb;
1438 	u32 cb_size;
1439 	bool patched_cb;
1440 
1441 	cntr = &hdev->aggregated_cs_counters;
1442 
1443 	if (mode == HL_COLLECTIVE_MASTER) {
1444 		/* CB size of collective master queue contains
1445 		 * 4 msg short packets for monitor 1 configuration
1446 		 * 1 fence packet
1447 		 * 4 msg short packets for monitor 2 configuration
1448 		 * 1 fence packet
1449 		 * 2 msg prot packets for completion and MSI
1450 		 */
1451 		cb_size = sizeof(struct packet_msg_short) * 8 +
1452 				sizeof(struct packet_fence) * 2 +
1453 				sizeof(struct packet_msg_prot) * 2;
1454 		patched_cb = true;
1455 	} else {
1456 		/* CB size of collective slave queues contains
1457 		 * 4 msg short packets for monitor configuration
1458 		 * 1 fence packet
1459 		 * 1 additional msg short packet for sob signal
1460 		 */
1461 		cb_size = sizeof(struct packet_msg_short) * 5 +
1462 				sizeof(struct packet_fence);
1463 		patched_cb = false;
1464 	}
1465 
1466 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1467 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1468 	if (!job) {
1469 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1470 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1471 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1472 		return -ENOMEM;
1473 	}
1474 
1475 	/* Allocate internal mapped CB for non patched CBs */
1476 	cb = hl_cb_kernel_create(hdev, cb_size,
1477 			hdev->mmu_enable && !patched_cb);
1478 	if (!cb) {
1479 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1480 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1481 		kfree(job);
1482 		return -EFAULT;
1483 	}
1484 
1485 	job->id = 0;
1486 	job->cs = cs;
1487 	job->user_cb = cb;
1488 	atomic_inc(&job->user_cb->cs_cnt);
1489 	job->user_cb_size = cb_size;
1490 	job->hw_queue_id = queue_id;
1491 
1492 	/* since its guaranteed to have only one chunk in the collective wait
1493 	 * cs, we can use this chunk to set the encapsulated signal offset
1494 	 * in the jobs.
1495 	 */
1496 	if (cs->encaps_signals)
1497 		job->encaps_sig_wait_offset = encaps_signal_offset;
1498 
1499 	/*
1500 	 * No need in parsing, user CB is the patched CB.
1501 	 * We call hl_cb_destroy() out of two reasons - we don't need
1502 	 * the CB in the CB idr anymore and to decrement its refcount as
1503 	 * it was incremented inside hl_cb_kernel_create().
1504 	 */
1505 	if (patched_cb)
1506 		job->patched_cb = job->user_cb;
1507 	else
1508 		job->patched_cb = NULL;
1509 
1510 	job->job_cb_size = job->user_cb_size;
1511 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1512 
1513 	/* increment refcount as for external queues we get completion */
1514 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1515 		cs_get(cs);
1516 
1517 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1518 
1519 	list_add_tail(&job->cs_node, &cs->job_list);
1520 
1521 	hl_debugfs_add_job(hdev, job);
1522 
1523 	return 0;
1524 }
1525 
1526 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1527 		struct hl_ctx *ctx, struct hl_cs *cs,
1528 		u32 wait_queue_id, u32 collective_engine_id,
1529 		u32 encaps_signal_offset)
1530 {
1531 	struct gaudi_device *gaudi = hdev->asic_specific;
1532 	struct hw_queue_properties *hw_queue_prop;
1533 	u32 queue_id, collective_queue, num_jobs;
1534 	u32 stream, nic_queue, nic_idx = 0;
1535 	bool skip;
1536 	int i, rc = 0;
1537 
1538 	/* Verify wait queue id is configured as master */
1539 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1540 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1541 		dev_err(hdev->dev,
1542 			"Queue %d is not configured as collective master\n",
1543 			wait_queue_id);
1544 		return -EINVAL;
1545 	}
1546 
1547 	/* Verify engine id is supported */
1548 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1549 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1550 		dev_err(hdev->dev,
1551 			"Collective wait does not support engine %u\n",
1552 			collective_engine_id);
1553 		return -EINVAL;
1554 	}
1555 
1556 	stream = wait_queue_id % 4;
1557 
1558 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1559 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1560 	else
1561 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1562 
1563 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1564 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1565 
1566 	/* First job goes to the collective master queue, it will wait for
1567 	 * the collective slave queues to finish execution.
1568 	 * The synchronization is done using two monitors:
1569 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1570 	 * reduction engine (DMA5/TPC7).
1571 	 *
1572 	 * Rest of the jobs goes to the collective slave queues which will
1573 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1574 	 */
1575 	for (i = 0 ; i < num_jobs ; i++) {
1576 		if (i == 0) {
1577 			queue_id = wait_queue_id;
1578 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1579 				HL_COLLECTIVE_MASTER, queue_id,
1580 				wait_queue_id, encaps_signal_offset);
1581 		} else {
1582 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1583 				if (gaudi->hw_cap_initialized &
1584 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1585 					skip = false;
1586 				else
1587 					skip = true;
1588 
1589 				queue_id = nic_queue;
1590 				nic_queue += 4;
1591 				nic_idx++;
1592 
1593 				if (skip)
1594 					continue;
1595 			} else {
1596 				queue_id = collective_queue;
1597 			}
1598 
1599 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1600 				HL_COLLECTIVE_SLAVE, queue_id,
1601 				wait_queue_id, encaps_signal_offset);
1602 		}
1603 
1604 		if (rc)
1605 			return rc;
1606 	}
1607 
1608 	return rc;
1609 }
1610 
1611 static int gaudi_late_init(struct hl_device *hdev)
1612 {
1613 	struct gaudi_device *gaudi = hdev->asic_specific;
1614 	int rc;
1615 
1616 	rc = gaudi->cpucp_info_get(hdev);
1617 	if (rc) {
1618 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1619 		return rc;
1620 	}
1621 
1622 	if ((hdev->card_type == cpucp_card_type_pci) &&
1623 			(hdev->nic_ports_mask & 0x3)) {
1624 		dev_info(hdev->dev,
1625 			"PCI card detected, only 8 ports are enabled\n");
1626 		hdev->nic_ports_mask &= ~0x3;
1627 
1628 		/* Stop and disable unused NIC QMANs */
1629 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1630 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1631 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1632 
1633 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1634 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1635 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1636 
1637 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1638 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1639 
1640 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1641 	}
1642 
1643 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1644 	if (rc) {
1645 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1646 		return rc;
1647 	}
1648 
1649 	/* Scrub both SRAM and DRAM */
1650 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1651 	if (rc)
1652 		goto disable_pci_access;
1653 
1654 	rc = gaudi_fetch_psoc_frequency(hdev);
1655 	if (rc) {
1656 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1657 		goto disable_pci_access;
1658 	}
1659 
1660 	rc = gaudi_mmu_clear_pgt_range(hdev);
1661 	if (rc) {
1662 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1663 		goto disable_pci_access;
1664 	}
1665 
1666 	rc = gaudi_init_tpc_mem(hdev);
1667 	if (rc) {
1668 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1669 		goto disable_pci_access;
1670 	}
1671 
1672 	rc = gaudi_collective_init(hdev);
1673 	if (rc) {
1674 		dev_err(hdev->dev, "Failed to init collective\n");
1675 		goto disable_pci_access;
1676 	}
1677 
1678 	/* We only support a single ASID for the user, so for the sake of optimization, just
1679 	 * initialize the ASID one time during device initialization with the fixed value of 1
1680 	 */
1681 	gaudi_mmu_prepare(hdev, 1);
1682 
1683 	hl_fw_set_pll_profile(hdev);
1684 
1685 	return 0;
1686 
1687 disable_pci_access:
1688 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1689 
1690 	return rc;
1691 }
1692 
1693 static void gaudi_late_fini(struct hl_device *hdev)
1694 {
1695 	hl_hwmon_release_resources(hdev);
1696 }
1697 
1698 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1699 {
1700 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1701 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1702 	int i, j, rc = 0;
1703 
1704 	/*
1705 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1706 	 * to '1' when accessing the host.
1707 	 * Bits 49:39 of the full host address are saved for a later
1708 	 * configuration of the HW to perform extension to 50 bits.
1709 	 * Because there is a single HW register that holds the extension bits,
1710 	 * these bits must be identical in all allocated range.
1711 	 */
1712 
1713 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1714 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1715 								&dma_addr_arr[i],
1716 								GFP_KERNEL | __GFP_ZERO);
1717 		if (!virt_addr_arr[i]) {
1718 			rc = -ENOMEM;
1719 			goto free_dma_mem_arr;
1720 		}
1721 
1722 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1723 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1724 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1725 			break;
1726 	}
1727 
1728 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1729 		dev_err(hdev->dev,
1730 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1731 		rc = -EFAULT;
1732 		goto free_dma_mem_arr;
1733 	}
1734 
1735 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1736 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1737 	hdev->cpu_pci_msb_addr =
1738 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1739 
1740 	if (!hdev->asic_prop.fw_security_enabled)
1741 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1742 
1743 free_dma_mem_arr:
1744 	for (j = 0 ; j < i ; j++)
1745 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1746 						dma_addr_arr[j]);
1747 
1748 	return rc;
1749 }
1750 
1751 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1752 {
1753 	struct gaudi_device *gaudi = hdev->asic_specific;
1754 	struct gaudi_internal_qman_info *q;
1755 	u32 i;
1756 
1757 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1758 		q = &gaudi->internal_qmans[i];
1759 		if (!q->pq_kernel_addr)
1760 			continue;
1761 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1762 	}
1763 }
1764 
1765 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1766 {
1767 	struct gaudi_device *gaudi = hdev->asic_specific;
1768 	struct gaudi_internal_qman_info *q;
1769 	int rc, i;
1770 
1771 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1772 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1773 			continue;
1774 
1775 		q = &gaudi->internal_qmans[i];
1776 
1777 		switch (i) {
1778 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1779 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1780 			break;
1781 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1782 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1783 			break;
1784 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1785 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1786 			break;
1787 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1788 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1789 			break;
1790 		default:
1791 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1792 			rc = -EINVAL;
1793 			goto free_internal_qmans_pq_mem;
1794 		}
1795 
1796 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1797 								GFP_KERNEL | __GFP_ZERO);
1798 		if (!q->pq_kernel_addr) {
1799 			rc = -ENOMEM;
1800 			goto free_internal_qmans_pq_mem;
1801 		}
1802 	}
1803 
1804 	return 0;
1805 
1806 free_internal_qmans_pq_mem:
1807 	gaudi_free_internal_qmans_pq_mem(hdev);
1808 	return rc;
1809 }
1810 
1811 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1812 {
1813 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1814 	struct pci_mem_region *region;
1815 
1816 	/* CFG */
1817 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1818 	region->region_base = CFG_BASE;
1819 	region->region_size = CFG_SIZE;
1820 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1821 	region->bar_size = CFG_BAR_SIZE;
1822 	region->bar_id = CFG_BAR_ID;
1823 	region->used = 1;
1824 
1825 	/* SRAM */
1826 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1827 	region->region_base = SRAM_BASE_ADDR;
1828 	region->region_size = SRAM_SIZE;
1829 	region->offset_in_bar = 0;
1830 	region->bar_size = SRAM_BAR_SIZE;
1831 	region->bar_id = SRAM_BAR_ID;
1832 	region->used = 1;
1833 
1834 	/* DRAM */
1835 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1836 	region->region_base = DRAM_PHYS_BASE;
1837 	region->region_size = hdev->asic_prop.dram_size;
1838 	region->offset_in_bar = 0;
1839 	region->bar_size = prop->dram_pci_bar_size;
1840 	region->bar_id = HBM_BAR_ID;
1841 	region->used = 1;
1842 
1843 	/* SP SRAM */
1844 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1845 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1846 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1847 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1848 	region->bar_size = CFG_BAR_SIZE;
1849 	region->bar_id = CFG_BAR_ID;
1850 	region->used = 1;
1851 }
1852 
1853 static int gaudi_sw_init(struct hl_device *hdev)
1854 {
1855 	struct gaudi_device *gaudi;
1856 	u32 i, event_id = 0;
1857 	int rc;
1858 
1859 	/* Allocate device structure */
1860 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1861 	if (!gaudi)
1862 		return -ENOMEM;
1863 
1864 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1865 		if (gaudi_irq_map_table[i].valid) {
1866 			if (event_id == GAUDI_EVENT_SIZE) {
1867 				dev_err(hdev->dev,
1868 					"Event array exceeds the limit of %u events\n",
1869 					GAUDI_EVENT_SIZE);
1870 				rc = -EINVAL;
1871 				goto free_gaudi_device;
1872 			}
1873 
1874 			gaudi->events[event_id++] =
1875 					gaudi_irq_map_table[i].fc_id;
1876 		}
1877 	}
1878 
1879 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1880 
1881 	hdev->asic_specific = gaudi;
1882 
1883 	/* Create DMA pool for small allocations */
1884 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1885 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1886 	if (!hdev->dma_pool) {
1887 		dev_err(hdev->dev, "failed to create DMA pool\n");
1888 		rc = -ENOMEM;
1889 		goto free_gaudi_device;
1890 	}
1891 
1892 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1893 	if (rc)
1894 		goto free_dma_pool;
1895 
1896 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1897 	if (!hdev->cpu_accessible_dma_pool) {
1898 		dev_err(hdev->dev,
1899 			"Failed to create CPU accessible DMA pool\n");
1900 		rc = -ENOMEM;
1901 		goto free_cpu_dma_mem;
1902 	}
1903 
1904 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1905 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1906 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1907 	if (rc) {
1908 		dev_err(hdev->dev,
1909 			"Failed to add memory to CPU accessible DMA pool\n");
1910 		rc = -EFAULT;
1911 		goto free_cpu_accessible_dma_pool;
1912 	}
1913 
1914 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1915 	if (rc)
1916 		goto free_cpu_accessible_dma_pool;
1917 
1918 	spin_lock_init(&gaudi->hw_queues_lock);
1919 
1920 	hdev->supports_sync_stream = true;
1921 	hdev->supports_coresight = true;
1922 	hdev->supports_staged_submission = true;
1923 	hdev->supports_wait_for_multi_cs = true;
1924 
1925 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1926 	hdev->stream_master_qid_arr =
1927 				hdev->asic_funcs->get_stream_master_qid_arr();
1928 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1929 
1930 	return 0;
1931 
1932 free_cpu_accessible_dma_pool:
1933 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1934 free_cpu_dma_mem:
1935 	if (!hdev->asic_prop.fw_security_enabled)
1936 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1937 					hdev->cpu_pci_msb_addr);
1938 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1939 					hdev->cpu_accessible_dma_address);
1940 free_dma_pool:
1941 	dma_pool_destroy(hdev->dma_pool);
1942 free_gaudi_device:
1943 	kfree(gaudi);
1944 	return rc;
1945 }
1946 
1947 static int gaudi_sw_fini(struct hl_device *hdev)
1948 {
1949 	struct gaudi_device *gaudi = hdev->asic_specific;
1950 
1951 	gaudi_free_internal_qmans_pq_mem(hdev);
1952 
1953 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1954 
1955 	if (!hdev->asic_prop.fw_security_enabled)
1956 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1957 					hdev->cpu_pci_msb_addr);
1958 
1959 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1960 					hdev->cpu_accessible_dma_address);
1961 
1962 	dma_pool_destroy(hdev->dma_pool);
1963 
1964 	kfree(gaudi);
1965 
1966 	return 0;
1967 }
1968 
1969 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1970 {
1971 	struct hl_device *hdev = arg;
1972 	int i;
1973 
1974 	if (hdev->disabled)
1975 		return IRQ_HANDLED;
1976 
1977 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1978 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1979 
1980 	hl_irq_handler_eq(irq, &hdev->event_queue);
1981 
1982 	return IRQ_HANDLED;
1983 }
1984 
1985 /*
1986  * For backward compatibility, new MSI interrupts should be set after the
1987  * existing CPU and NIC interrupts.
1988  */
1989 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1990 				bool cpu_eq)
1991 {
1992 	int msi_vec;
1993 
1994 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1995 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1996 				GAUDI_EVENT_QUEUE_MSI_IDX);
1997 
1998 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1999 			(nr + NIC_NUMBER_OF_ENGINES + 1);
2000 
2001 	return pci_irq_vector(hdev->pdev, msi_vec);
2002 }
2003 
2004 static int gaudi_enable_msi_single(struct hl_device *hdev)
2005 {
2006 	int rc, irq;
2007 
2008 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2009 
2010 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2011 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2012 			"gaudi single msi", hdev);
2013 	if (rc)
2014 		dev_err(hdev->dev,
2015 			"Failed to request single MSI IRQ\n");
2016 
2017 	return rc;
2018 }
2019 
2020 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2021 {
2022 	int cq_cnt = hdev->asic_prop.completion_queues_count;
2023 	int rc, i, irq_cnt_init, irq;
2024 
2025 	for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2026 		irq = gaudi_pci_irq_vector(hdev, i, false);
2027 		rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2028 				&hdev->completion_queue[i]);
2029 		if (rc) {
2030 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2031 			goto free_irqs;
2032 		}
2033 	}
2034 
2035 	irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2036 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2037 				&hdev->event_queue);
2038 	if (rc) {
2039 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2040 		goto free_irqs;
2041 	}
2042 
2043 	return 0;
2044 
2045 free_irqs:
2046 	for (i = 0 ; i < irq_cnt_init ; i++)
2047 		free_irq(gaudi_pci_irq_vector(hdev, i, false),
2048 				&hdev->completion_queue[i]);
2049 	return rc;
2050 }
2051 
2052 static int gaudi_enable_msi(struct hl_device *hdev)
2053 {
2054 	struct gaudi_device *gaudi = hdev->asic_specific;
2055 	int rc;
2056 
2057 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2058 		return 0;
2059 
2060 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2061 	if (rc < 0) {
2062 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2063 		return rc;
2064 	}
2065 
2066 	if (rc < NUMBER_OF_INTERRUPTS) {
2067 		gaudi->multi_msi_mode = false;
2068 		rc = gaudi_enable_msi_single(hdev);
2069 	} else {
2070 		gaudi->multi_msi_mode = true;
2071 		rc = gaudi_enable_msi_multi(hdev);
2072 	}
2073 
2074 	if (rc)
2075 		goto free_pci_irq_vectors;
2076 
2077 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2078 
2079 	return 0;
2080 
2081 free_pci_irq_vectors:
2082 	pci_free_irq_vectors(hdev->pdev);
2083 	return rc;
2084 }
2085 
2086 static void gaudi_sync_irqs(struct hl_device *hdev)
2087 {
2088 	struct gaudi_device *gaudi = hdev->asic_specific;
2089 	int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2090 
2091 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2092 		return;
2093 
2094 	/* Wait for all pending IRQs to be finished */
2095 	if (gaudi->multi_msi_mode) {
2096 		for (i = 0 ; i < cq_cnt ; i++)
2097 			synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2098 
2099 		synchronize_irq(gaudi_pci_irq_vector(hdev,
2100 						GAUDI_EVENT_QUEUE_MSI_IDX,
2101 						true));
2102 	} else {
2103 		synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2104 	}
2105 }
2106 
2107 static void gaudi_disable_msi(struct hl_device *hdev)
2108 {
2109 	struct gaudi_device *gaudi = hdev->asic_specific;
2110 	int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2111 
2112 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2113 		return;
2114 
2115 	gaudi_sync_irqs(hdev);
2116 
2117 	if (gaudi->multi_msi_mode) {
2118 		irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2119 						true);
2120 		free_irq(irq, &hdev->event_queue);
2121 
2122 		for (i = 0 ; i < cq_cnt ; i++) {
2123 			irq = gaudi_pci_irq_vector(hdev, i, false);
2124 			free_irq(irq, &hdev->completion_queue[i]);
2125 		}
2126 	} else {
2127 		free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2128 	}
2129 
2130 	pci_free_irq_vectors(hdev->pdev);
2131 
2132 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2133 }
2134 
2135 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2136 {
2137 	struct gaudi_device *gaudi = hdev->asic_specific;
2138 
2139 	if (hdev->asic_prop.fw_security_enabled)
2140 		return;
2141 
2142 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2143 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2144 		return;
2145 
2146 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2147 		return;
2148 
2149 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2150 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2152 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2154 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2155 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2156 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2157 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165 
2166 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2167 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2169 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2170 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2171 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2172 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2173 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2174 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2175 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2176 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2177 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2178 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2179 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2180 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2181 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2182 
2183 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2184 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2185 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2186 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2187 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2188 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2189 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2190 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2191 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2192 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2193 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2194 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2195 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2196 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2197 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2198 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2199 
2200 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2201 }
2202 
2203 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2204 {
2205 	struct gaudi_device *gaudi = hdev->asic_specific;
2206 
2207 	if (hdev->asic_prop.fw_security_enabled)
2208 		return;
2209 
2210 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2211 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2212 		return;
2213 
2214 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2215 		return;
2216 
2217 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2218 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2220 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2222 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2223 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2224 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2225 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2226 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2228 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2230 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2232 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233 
2234 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2235 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2237 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2239 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2240 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2241 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2242 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2243 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2244 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2245 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2246 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2247 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2248 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2249 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2250 
2251 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2252 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2253 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2254 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2255 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2256 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2257 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2258 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2259 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2260 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2261 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2262 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2263 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2264 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2265 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2266 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2267 
2268 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2269 }
2270 
2271 static void gaudi_init_e2e(struct hl_device *hdev)
2272 {
2273 	if (hdev->asic_prop.fw_security_enabled)
2274 		return;
2275 
2276 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2277 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2278 		return;
2279 
2280 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2281 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2282 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2283 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2284 
2285 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2286 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2287 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2288 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2289 
2290 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2291 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2292 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2293 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2294 
2295 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2296 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2297 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2298 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2299 
2300 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2301 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2302 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2303 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2304 
2305 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2306 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2307 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2308 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2309 
2310 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2311 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2312 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2313 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2314 
2315 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2316 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2317 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2318 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2319 
2320 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2321 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2322 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2323 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2324 
2325 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2326 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2327 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2328 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2329 
2330 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2331 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2332 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2333 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2334 
2335 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2336 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2337 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2338 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2339 
2340 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2341 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2342 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2343 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2344 
2345 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2346 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2347 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2348 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2349 
2350 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2351 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2352 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2353 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2354 
2355 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2356 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2357 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2358 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2359 
2360 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2361 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2362 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2363 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2364 
2365 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2366 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2367 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2368 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2369 
2370 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2371 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2372 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2373 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2374 
2375 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2376 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2377 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2378 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2379 
2380 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2381 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2382 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2383 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2384 
2385 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2386 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2387 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2388 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2389 
2390 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2391 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2392 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2393 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2394 
2395 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2396 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2397 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2398 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2399 
2400 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2401 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2403 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404 
2405 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2406 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2408 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409 
2410 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2411 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2413 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414 
2415 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2416 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2417 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2418 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2419 
2420 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2421 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2422 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2423 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2424 
2425 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2426 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2427 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2428 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2429 
2430 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2431 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2432 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2433 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2434 
2435 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2436 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2437 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2438 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2439 
2440 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2441 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2442 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2443 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2444 
2445 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2446 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2447 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2448 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2449 
2450 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2451 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2452 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2453 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2454 
2455 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2456 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2457 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2458 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2459 
2460 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2461 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2462 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2463 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2464 
2465 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2466 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2467 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2468 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2469 
2470 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2471 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2472 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2473 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2474 
2475 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2476 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2477 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2478 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2479 
2480 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2481 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2482 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2483 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2484 
2485 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2486 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2487 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2488 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2489 
2490 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2491 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2492 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2493 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2494 
2495 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2496 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2497 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2498 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2499 
2500 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2501 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2502 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2503 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2504 
2505 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2506 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2507 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2508 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2509 
2510 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2511 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2512 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2513 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2514 
2515 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2516 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2517 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2518 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2519 }
2520 
2521 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2522 {
2523 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2524 
2525 	if (hdev->asic_prop.fw_security_enabled)
2526 		return;
2527 
2528 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2529 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2530 		return;
2531 
2532 	hbm0_wr = 0x33333333;
2533 	hbm0_rd = 0x77777777;
2534 	hbm1_wr = 0x55555555;
2535 	hbm1_rd = 0xDDDDDDDD;
2536 
2537 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2538 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2539 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2540 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2541 
2542 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2543 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2544 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2545 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2546 
2547 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2548 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2549 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2550 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2551 
2552 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2553 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2554 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2555 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2556 
2557 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2558 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2559 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2560 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2561 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2562 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2563 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2564 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2565 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2566 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2567 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2568 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2569 
2570 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2571 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2572 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2573 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2574 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2575 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2576 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2577 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2578 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2579 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2580 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2581 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2582 }
2583 
2584 static void gaudi_init_golden_registers(struct hl_device *hdev)
2585 {
2586 	u32 tpc_offset;
2587 	int tpc_id, i;
2588 
2589 	gaudi_init_e2e(hdev);
2590 	gaudi_init_hbm_cred(hdev);
2591 
2592 	for (tpc_id = 0, tpc_offset = 0;
2593 				tpc_id < TPC_NUMBER_OF_ENGINES;
2594 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2595 		/* Mask all arithmetic interrupts from TPC */
2596 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2597 		/* Set 16 cache lines */
2598 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2599 				ICACHE_FETCH_LINE_NUM, 2);
2600 	}
2601 
2602 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2603 	for (i = 0 ; i < 128 ; i += 8)
2604 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2605 
2606 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2607 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2608 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2609 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2610 }
2611 
2612 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2613 					int qman_id, dma_addr_t qman_pq_addr)
2614 {
2615 	struct cpu_dyn_regs *dyn_regs =
2616 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2617 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2618 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2619 	u32 q_off, dma_qm_offset;
2620 	u32 dma_qm_err_cfg, irq_handler_offset;
2621 
2622 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2623 
2624 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2625 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2626 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2627 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2628 	so_base_en_lo = lower_32_bits(CFG_BASE +
2629 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2630 	so_base_en_hi = upper_32_bits(CFG_BASE +
2631 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2632 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2633 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2634 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2635 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2636 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2637 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2638 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2639 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2640 
2641 	q_off = dma_qm_offset + qman_id * 4;
2642 
2643 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2644 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2645 
2646 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2647 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2648 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2649 
2650 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2651 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2652 							QMAN_LDMA_SRC_OFFSET);
2653 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2654 							QMAN_LDMA_DST_OFFSET);
2655 
2656 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2657 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2658 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2659 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2660 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2661 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2662 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2663 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2664 
2665 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2666 
2667 	/* The following configuration is needed only once per QMAN */
2668 	if (qman_id == 0) {
2669 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2670 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2671 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2672 
2673 		/* Configure RAZWI IRQ */
2674 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2675 		if (hdev->stop_on_err)
2676 			dma_qm_err_cfg |=
2677 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2678 
2679 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2680 
2681 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2682 			lower_32_bits(CFG_BASE + irq_handler_offset));
2683 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2684 			upper_32_bits(CFG_BASE + irq_handler_offset));
2685 
2686 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2687 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2688 									dma_id);
2689 
2690 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2691 				QM_ARB_ERR_MSG_EN_MASK);
2692 
2693 		/* Set timeout to maximum */
2694 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2695 
2696 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2697 				QMAN_EXTERNAL_MAKE_TRUSTED);
2698 
2699 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2700 	}
2701 }
2702 
2703 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2704 {
2705 	struct cpu_dyn_regs *dyn_regs =
2706 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2707 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2708 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2709 	u32 irq_handler_offset;
2710 
2711 	/* Set to maximum possible according to physical size */
2712 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2713 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2714 
2715 	/* WA for H/W bug H3-2116 */
2716 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2717 
2718 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2719 	if (hdev->stop_on_err)
2720 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2721 
2722 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2723 
2724 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2725 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2726 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2727 
2728 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2729 		lower_32_bits(CFG_BASE + irq_handler_offset));
2730 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2731 		upper_32_bits(CFG_BASE + irq_handler_offset));
2732 
2733 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2734 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2735 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2736 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2737 	/* If the channel is secured, it should be in MMU bypass mode */
2738 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2739 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2740 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2741 }
2742 
2743 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2744 				u32 enable_mask)
2745 {
2746 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2747 
2748 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2749 }
2750 
2751 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2752 {
2753 	struct gaudi_device *gaudi = hdev->asic_specific;
2754 	struct hl_hw_queue *q;
2755 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2756 
2757 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2758 		return;
2759 
2760 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2761 		dma_id = gaudi_dma_assignment[i];
2762 		/*
2763 		 * For queues after the CPU Q need to add 1 to get the correct
2764 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2765 		 * order to get the correct MSI register.
2766 		 */
2767 		if (dma_id > 1) {
2768 			cpu_skip = 1;
2769 			nic_skip = NIC_NUMBER_OF_ENGINES;
2770 		} else {
2771 			cpu_skip = 0;
2772 			nic_skip = 0;
2773 		}
2774 
2775 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2776 			q_idx = 4 * dma_id + j + cpu_skip;
2777 			q = &hdev->kernel_queues[q_idx];
2778 			q->cq_id = cq_id++;
2779 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2780 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2781 						q->bus_address);
2782 		}
2783 
2784 		gaudi_init_dma_core(hdev, dma_id);
2785 
2786 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2787 	}
2788 
2789 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2790 }
2791 
2792 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2793 					int qman_id, u64 qman_base_addr)
2794 {
2795 	struct cpu_dyn_regs *dyn_regs =
2796 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2797 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2798 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2799 	u32 dma_qm_err_cfg, irq_handler_offset;
2800 	u32 q_off, dma_qm_offset;
2801 
2802 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2803 
2804 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2805 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2806 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2807 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2808 	so_base_en_lo = lower_32_bits(CFG_BASE +
2809 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2810 	so_base_en_hi = upper_32_bits(CFG_BASE +
2811 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2812 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2813 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2814 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2815 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2816 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2817 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2818 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2819 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2820 
2821 	q_off = dma_qm_offset + qman_id * 4;
2822 
2823 	if (qman_id < 4) {
2824 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2825 					lower_32_bits(qman_base_addr));
2826 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2827 					upper_32_bits(qman_base_addr));
2828 
2829 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2830 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2831 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2832 
2833 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2834 							QMAN_CPDMA_SIZE_OFFSET);
2835 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2836 							QMAN_CPDMA_SRC_OFFSET);
2837 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2838 							QMAN_CPDMA_DST_OFFSET);
2839 	} else {
2840 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2841 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2842 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2843 
2844 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2845 							QMAN_LDMA_SIZE_OFFSET);
2846 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2847 							QMAN_LDMA_SRC_OFFSET);
2848 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2849 							QMAN_LDMA_DST_OFFSET);
2850 
2851 		/* Configure RAZWI IRQ */
2852 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2853 		if (hdev->stop_on_err)
2854 			dma_qm_err_cfg |=
2855 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2856 
2857 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2858 
2859 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2860 			lower_32_bits(CFG_BASE + irq_handler_offset));
2861 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2862 			upper_32_bits(CFG_BASE + irq_handler_offset));
2863 
2864 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2865 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2866 									dma_id);
2867 
2868 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2869 				QM_ARB_ERR_MSG_EN_MASK);
2870 
2871 		/* Set timeout to maximum */
2872 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2873 
2874 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2875 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2876 				QMAN_INTERNAL_MAKE_TRUSTED);
2877 	}
2878 
2879 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2880 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2881 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2882 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2883 
2884 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2885 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2886 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2887 				mtr_base_ws_lo);
2888 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2889 				mtr_base_ws_hi);
2890 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2891 				so_base_ws_lo);
2892 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2893 				so_base_ws_hi);
2894 	}
2895 }
2896 
2897 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2898 {
2899 	struct gaudi_device *gaudi = hdev->asic_specific;
2900 	struct gaudi_internal_qman_info *q;
2901 	u64 qman_base_addr;
2902 	int i, j, dma_id, internal_q_index;
2903 
2904 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2905 		return;
2906 
2907 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2908 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2909 
2910 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2911 			 /*
2912 			  * Add the CPU queue in order to get the correct queue
2913 			  * number as all internal queue are placed after it
2914 			  */
2915 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2916 
2917 			q = &gaudi->internal_qmans[internal_q_index];
2918 			qman_base_addr = (u64) q->pq_dma_addr;
2919 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2920 						qman_base_addr);
2921 		}
2922 
2923 		/* Initializing lower CP for HBM DMA QMAN */
2924 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2925 
2926 		gaudi_init_dma_core(hdev, dma_id);
2927 
2928 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2929 	}
2930 
2931 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2932 }
2933 
2934 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2935 					int qman_id, u64 qman_base_addr)
2936 {
2937 	struct cpu_dyn_regs *dyn_regs =
2938 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2939 	u32 mtr_base_lo, mtr_base_hi;
2940 	u32 so_base_lo, so_base_hi;
2941 	u32 irq_handler_offset;
2942 	u32 q_off, mme_id;
2943 	u32 mme_qm_err_cfg;
2944 
2945 	mtr_base_lo = lower_32_bits(CFG_BASE +
2946 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2947 	mtr_base_hi = upper_32_bits(CFG_BASE +
2948 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2949 	so_base_lo = lower_32_bits(CFG_BASE +
2950 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2951 	so_base_hi = upper_32_bits(CFG_BASE +
2952 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2953 
2954 	q_off = mme_offset + qman_id * 4;
2955 
2956 	if (qman_id < 4) {
2957 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2958 					lower_32_bits(qman_base_addr));
2959 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2960 					upper_32_bits(qman_base_addr));
2961 
2962 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2963 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2964 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2965 
2966 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2967 							QMAN_CPDMA_SIZE_OFFSET);
2968 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2969 							QMAN_CPDMA_SRC_OFFSET);
2970 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2971 							QMAN_CPDMA_DST_OFFSET);
2972 	} else {
2973 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2974 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2975 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2976 
2977 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2978 							QMAN_LDMA_SIZE_OFFSET);
2979 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2980 							QMAN_LDMA_SRC_OFFSET);
2981 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2982 							QMAN_LDMA_DST_OFFSET);
2983 
2984 		/* Configure RAZWI IRQ */
2985 		mme_id = mme_offset /
2986 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2987 
2988 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2989 		if (hdev->stop_on_err)
2990 			mme_qm_err_cfg |=
2991 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2992 
2993 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2994 
2995 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2996 			lower_32_bits(CFG_BASE + irq_handler_offset));
2997 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2998 			upper_32_bits(CFG_BASE + irq_handler_offset));
2999 
3000 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3001 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3002 									mme_id);
3003 
3004 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3005 				QM_ARB_ERR_MSG_EN_MASK);
3006 
3007 		/* Set timeout to maximum */
3008 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3009 
3010 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3011 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3012 				QMAN_INTERNAL_MAKE_TRUSTED);
3013 	}
3014 
3015 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3016 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3017 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3018 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3019 }
3020 
3021 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3022 {
3023 	struct gaudi_device *gaudi = hdev->asic_specific;
3024 	struct gaudi_internal_qman_info *q;
3025 	u64 qman_base_addr;
3026 	u32 mme_offset;
3027 	int i, internal_q_index;
3028 
3029 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
3030 		return;
3031 
3032 	/*
3033 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3034 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3035 	 */
3036 
3037 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3038 
3039 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3040 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3041 		q = &gaudi->internal_qmans[internal_q_index];
3042 		qman_base_addr = (u64) q->pq_dma_addr;
3043 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3044 					qman_base_addr);
3045 		if (i == 3)
3046 			mme_offset = 0;
3047 	}
3048 
3049 	/* Initializing lower CP for MME QMANs */
3050 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3051 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3052 	gaudi_init_mme_qman(hdev, 0, 4, 0);
3053 
3054 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3055 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3056 
3057 	gaudi->hw_cap_initialized |= HW_CAP_MME;
3058 }
3059 
3060 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3061 				int qman_id, u64 qman_base_addr)
3062 {
3063 	struct cpu_dyn_regs *dyn_regs =
3064 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3065 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3066 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3067 	u32 tpc_qm_err_cfg, irq_handler_offset;
3068 	u32 q_off, tpc_id;
3069 
3070 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3071 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3072 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3073 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3074 	so_base_en_lo = lower_32_bits(CFG_BASE +
3075 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3076 	so_base_en_hi = upper_32_bits(CFG_BASE +
3077 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3078 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3079 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3080 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3081 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3082 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3083 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3084 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3085 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3086 
3087 	q_off = tpc_offset + qman_id * 4;
3088 
3089 	tpc_id = tpc_offset /
3090 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3091 
3092 	if (qman_id < 4) {
3093 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3094 					lower_32_bits(qman_base_addr));
3095 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3096 					upper_32_bits(qman_base_addr));
3097 
3098 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3099 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3100 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3101 
3102 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3103 							QMAN_CPDMA_SIZE_OFFSET);
3104 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3105 							QMAN_CPDMA_SRC_OFFSET);
3106 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3107 							QMAN_CPDMA_DST_OFFSET);
3108 	} else {
3109 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3110 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3111 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3112 
3113 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3114 							QMAN_LDMA_SIZE_OFFSET);
3115 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3116 							QMAN_LDMA_SRC_OFFSET);
3117 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3118 							QMAN_LDMA_DST_OFFSET);
3119 
3120 		/* Configure RAZWI IRQ */
3121 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3122 		if (hdev->stop_on_err)
3123 			tpc_qm_err_cfg |=
3124 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3125 
3126 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3127 
3128 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3129 			lower_32_bits(CFG_BASE + irq_handler_offset));
3130 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3131 			upper_32_bits(CFG_BASE + irq_handler_offset));
3132 
3133 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3134 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3135 									tpc_id);
3136 
3137 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3138 				QM_ARB_ERR_MSG_EN_MASK);
3139 
3140 		/* Set timeout to maximum */
3141 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3142 
3143 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3144 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3145 				QMAN_INTERNAL_MAKE_TRUSTED);
3146 	}
3147 
3148 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3149 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3150 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3151 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3152 
3153 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3154 	if (tpc_id == 6) {
3155 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3156 				mtr_base_ws_lo);
3157 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3158 				mtr_base_ws_hi);
3159 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3160 				so_base_ws_lo);
3161 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3162 				so_base_ws_hi);
3163 	}
3164 }
3165 
3166 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3167 {
3168 	struct gaudi_device *gaudi = hdev->asic_specific;
3169 	struct gaudi_internal_qman_info *q;
3170 	u64 qman_base_addr;
3171 	u32 so_base_hi, tpc_offset = 0;
3172 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3173 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3174 	int i, tpc_id, internal_q_index;
3175 
3176 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3177 		return;
3178 
3179 	so_base_hi = upper_32_bits(CFG_BASE +
3180 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3181 
3182 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3183 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3184 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3185 						tpc_id * QMAN_STREAMS + i;
3186 			q = &gaudi->internal_qmans[internal_q_index];
3187 			qman_base_addr = (u64) q->pq_dma_addr;
3188 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3189 						qman_base_addr);
3190 
3191 			if (i == 3) {
3192 				/* Initializing lower CP for TPC QMAN */
3193 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3194 
3195 				/* Enable the QMAN and TPC channel */
3196 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3197 						QMAN_TPC_ENABLE);
3198 			}
3199 		}
3200 
3201 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3202 				so_base_hi);
3203 
3204 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3205 
3206 		gaudi->hw_cap_initialized |=
3207 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3208 	}
3209 }
3210 
3211 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3212 				int qman_id, u64 qman_base_addr, int nic_id)
3213 {
3214 	struct cpu_dyn_regs *dyn_regs =
3215 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3216 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3217 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3218 	u32 nic_qm_err_cfg, irq_handler_offset;
3219 	u32 q_off;
3220 
3221 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3222 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3223 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3224 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3225 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3226 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3227 	so_base_en_hi = upper_32_bits(CFG_BASE +
3228 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3229 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3230 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3231 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3232 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3233 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3234 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3235 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3236 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3237 
3238 	q_off = nic_offset + qman_id * 4;
3239 
3240 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3241 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3242 
3243 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3244 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3245 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3246 
3247 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3248 							QMAN_LDMA_SIZE_OFFSET);
3249 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3250 							QMAN_LDMA_SRC_OFFSET);
3251 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3252 							QMAN_LDMA_DST_OFFSET);
3253 
3254 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3255 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3256 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3257 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3258 
3259 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3260 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3261 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3262 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3263 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3264 
3265 	if (qman_id == 0) {
3266 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3267 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3268 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3269 
3270 		/* Configure RAZWI IRQ */
3271 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3272 		if (hdev->stop_on_err)
3273 			nic_qm_err_cfg |=
3274 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3275 
3276 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3277 
3278 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3279 			lower_32_bits(CFG_BASE + irq_handler_offset));
3280 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3281 			upper_32_bits(CFG_BASE + irq_handler_offset));
3282 
3283 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3284 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3285 									nic_id);
3286 
3287 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3288 				QM_ARB_ERR_MSG_EN_MASK);
3289 
3290 		/* Set timeout to maximum */
3291 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3292 
3293 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3294 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3295 				QMAN_INTERNAL_MAKE_TRUSTED);
3296 	}
3297 }
3298 
3299 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3300 {
3301 	struct gaudi_device *gaudi = hdev->asic_specific;
3302 	struct gaudi_internal_qman_info *q;
3303 	u64 qman_base_addr;
3304 	u32 nic_offset = 0;
3305 	u32 nic_delta_between_qmans =
3306 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3307 	u32 nic_delta_between_nics =
3308 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3309 	int i, nic_id, internal_q_index;
3310 
3311 	if (!hdev->nic_ports_mask)
3312 		return;
3313 
3314 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3315 		return;
3316 
3317 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3318 
3319 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3320 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3321 			nic_offset += nic_delta_between_qmans;
3322 			if (nic_id & 1) {
3323 				nic_offset -= (nic_delta_between_qmans * 2);
3324 				nic_offset += nic_delta_between_nics;
3325 			}
3326 			continue;
3327 		}
3328 
3329 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3330 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3331 						nic_id * QMAN_STREAMS + i;
3332 			q = &gaudi->internal_qmans[internal_q_index];
3333 			qman_base_addr = (u64) q->pq_dma_addr;
3334 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3335 						qman_base_addr, nic_id);
3336 		}
3337 
3338 		/* Enable the QMAN */
3339 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3340 
3341 		nic_offset += nic_delta_between_qmans;
3342 		if (nic_id & 1) {
3343 			nic_offset -= (nic_delta_between_qmans * 2);
3344 			nic_offset += nic_delta_between_nics;
3345 		}
3346 
3347 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348 	}
3349 }
3350 
3351 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3352 {
3353 	struct gaudi_device *gaudi = hdev->asic_specific;
3354 
3355 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3356 		return;
3357 
3358 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3359 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3360 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3361 }
3362 
3363 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3364 {
3365 	struct gaudi_device *gaudi = hdev->asic_specific;
3366 
3367 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3368 		return;
3369 
3370 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3371 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3372 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3373 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3374 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3375 }
3376 
3377 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3378 {
3379 	struct gaudi_device *gaudi = hdev->asic_specific;
3380 
3381 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3382 		return;
3383 
3384 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3385 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3386 }
3387 
3388 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3389 {
3390 	struct gaudi_device *gaudi = hdev->asic_specific;
3391 	u32 tpc_offset = 0;
3392 	int tpc_id;
3393 
3394 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3395 		return;
3396 
3397 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3398 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3399 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3400 	}
3401 }
3402 
3403 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3404 {
3405 	struct gaudi_device *gaudi = hdev->asic_specific;
3406 	u32 nic_mask, nic_offset = 0;
3407 	u32 nic_delta_between_qmans =
3408 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3409 	u32 nic_delta_between_nics =
3410 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3411 	int nic_id;
3412 
3413 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3414 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3415 
3416 		if (gaudi->hw_cap_initialized & nic_mask)
3417 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3418 
3419 		nic_offset += nic_delta_between_qmans;
3420 		if (nic_id & 1) {
3421 			nic_offset -= (nic_delta_between_qmans * 2);
3422 			nic_offset += nic_delta_between_nics;
3423 		}
3424 	}
3425 }
3426 
3427 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3428 {
3429 	struct gaudi_device *gaudi = hdev->asic_specific;
3430 
3431 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3432 		return;
3433 
3434 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3435 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3436 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3437 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3438 }
3439 
3440 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3441 {
3442 	struct gaudi_device *gaudi = hdev->asic_specific;
3443 
3444 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3445 		return;
3446 
3447 	/* Stop CPs of HBM DMA QMANs */
3448 
3449 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3450 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3451 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3452 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3453 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3454 }
3455 
3456 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3457 {
3458 	struct gaudi_device *gaudi = hdev->asic_specific;
3459 
3460 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3461 		return;
3462 
3463 	/* Stop CPs of MME QMANs */
3464 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3465 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3466 }
3467 
3468 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3469 {
3470 	struct gaudi_device *gaudi = hdev->asic_specific;
3471 
3472 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3473 		return;
3474 
3475 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3477 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3478 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3479 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3480 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3481 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3482 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3483 }
3484 
3485 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3486 {
3487 	struct gaudi_device *gaudi = hdev->asic_specific;
3488 
3489 	/* Stop upper CPs of QMANs */
3490 
3491 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3492 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3493 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3494 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3495 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3496 
3497 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3498 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3499 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3500 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3501 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3502 
3503 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3504 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3505 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3506 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3507 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3508 
3509 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3510 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3511 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3512 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3513 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3514 
3515 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3516 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3517 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3518 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3519 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3520 
3521 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3522 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3523 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3524 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3525 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3526 
3527 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3528 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3529 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3530 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3531 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3532 
3533 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3534 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3535 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3536 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3537 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3538 
3539 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3540 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3541 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3542 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3543 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3544 
3545 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3546 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3547 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3548 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3549 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3550 }
3551 
3552 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3553 {
3554 	struct gaudi_device *gaudi = hdev->asic_specific;
3555 
3556 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3557 		return;
3558 
3559 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3560 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3561 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3562 }
3563 
3564 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3565 {
3566 	struct gaudi_device *gaudi = hdev->asic_specific;
3567 
3568 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3569 		return;
3570 
3571 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3572 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3573 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3574 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3575 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3576 }
3577 
3578 static void gaudi_mme_stall(struct hl_device *hdev)
3579 {
3580 	struct gaudi_device *gaudi = hdev->asic_specific;
3581 
3582 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3583 		return;
3584 
3585 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3586 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3587 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3588 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3589 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3590 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3591 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3592 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3593 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3594 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3595 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3596 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3597 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3598 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3599 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3600 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3601 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3602 }
3603 
3604 static void gaudi_tpc_stall(struct hl_device *hdev)
3605 {
3606 	struct gaudi_device *gaudi = hdev->asic_specific;
3607 
3608 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3609 		return;
3610 
3611 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3612 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3613 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3614 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3615 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3616 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3617 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3618 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3619 }
3620 
3621 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3622 {
3623 	u32 qman_offset;
3624 	int i;
3625 
3626 	if (hdev->asic_prop.fw_security_enabled)
3627 		return;
3628 
3629 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3630 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3631 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3632 
3633 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3634 	}
3635 
3636 	WREG32(mmMME0_QM_CGM_CFG, 0);
3637 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3638 	WREG32(mmMME2_QM_CGM_CFG, 0);
3639 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3640 
3641 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3642 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3643 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3644 
3645 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3646 	}
3647 }
3648 
3649 static void gaudi_enable_timestamp(struct hl_device *hdev)
3650 {
3651 	/* Disable the timestamp counter */
3652 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3653 
3654 	/* Zero the lower/upper parts of the 64-bit counter */
3655 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3656 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3657 
3658 	/* Enable the counter */
3659 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3660 }
3661 
3662 static void gaudi_disable_timestamp(struct hl_device *hdev)
3663 {
3664 	/* Disable the timestamp counter */
3665 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3666 }
3667 
3668 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3669 {
3670 	u32 wait_timeout_ms;
3671 
3672 	if (hdev->pldm)
3673 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3674 	else
3675 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3676 
3677 	if (fw_reset)
3678 		goto skip_engines;
3679 
3680 	gaudi_stop_nic_qmans(hdev);
3681 	gaudi_stop_mme_qmans(hdev);
3682 	gaudi_stop_tpc_qmans(hdev);
3683 	gaudi_stop_hbm_dma_qmans(hdev);
3684 	gaudi_stop_pci_dma_qmans(hdev);
3685 
3686 	msleep(wait_timeout_ms);
3687 
3688 	gaudi_pci_dma_stall(hdev);
3689 	gaudi_hbm_dma_stall(hdev);
3690 	gaudi_tpc_stall(hdev);
3691 	gaudi_mme_stall(hdev);
3692 
3693 	msleep(wait_timeout_ms);
3694 
3695 	gaudi_disable_nic_qmans(hdev);
3696 	gaudi_disable_mme_qmans(hdev);
3697 	gaudi_disable_tpc_qmans(hdev);
3698 	gaudi_disable_hbm_dma_qmans(hdev);
3699 	gaudi_disable_pci_dma_qmans(hdev);
3700 
3701 	gaudi_disable_timestamp(hdev);
3702 
3703 skip_engines:
3704 	gaudi_disable_msi(hdev);
3705 }
3706 
3707 static int gaudi_mmu_init(struct hl_device *hdev)
3708 {
3709 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3710 	struct gaudi_device *gaudi = hdev->asic_specific;
3711 	u64 hop0_addr;
3712 	int rc, i;
3713 
3714 	if (!hdev->mmu_enable)
3715 		return 0;
3716 
3717 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3718 		return 0;
3719 
3720 	for (i = 0 ; i < prop->max_asid ; i++) {
3721 		hop0_addr = prop->mmu_pgt_addr +
3722 				(i * prop->mmu_hop_table_size);
3723 
3724 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3725 		if (rc) {
3726 			dev_err(hdev->dev,
3727 				"failed to set hop0 addr for asid %d\n", i);
3728 			return rc;
3729 		}
3730 	}
3731 
3732 	/* init MMU cache manage page */
3733 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3734 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3735 
3736 	/* mem cache invalidation */
3737 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3738 
3739 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3740 	if (rc)
3741 		return rc;
3742 
3743 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3744 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3745 
3746 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3747 
3748 	/*
3749 	 * The H/W expects the first PI after init to be 1. After wraparound
3750 	 * we'll write 0.
3751 	 */
3752 	gaudi->mmu_cache_inv_pi = 1;
3753 
3754 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3755 
3756 	return 0;
3757 }
3758 
3759 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3760 {
3761 	void __iomem *dst;
3762 
3763 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3764 
3765 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3766 }
3767 
3768 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3769 {
3770 	void __iomem *dst;
3771 
3772 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3773 
3774 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3775 }
3776 
3777 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3778 {
3779 	struct dynamic_fw_load_mgr *dynamic_loader;
3780 	struct cpu_dyn_regs *dyn_regs;
3781 
3782 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3783 
3784 	/*
3785 	 * here we update initial values for few specific dynamic regs (as
3786 	 * before reading the first descriptor from FW those value has to be
3787 	 * hard-coded) in later stages of the protocol those values will be
3788 	 * updated automatically by reading the FW descriptor so data there
3789 	 * will always be up-to-date
3790 	 */
3791 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3792 	dyn_regs->kmd_msg_to_cpu =
3793 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3794 	dyn_regs->cpu_cmd_status_to_host =
3795 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3796 
3797 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3798 }
3799 
3800 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3801 {
3802 	struct static_fw_load_mgr *static_loader;
3803 
3804 	static_loader = &hdev->fw_loader.static_loader;
3805 
3806 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3807 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3808 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3809 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3810 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3811 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3812 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3813 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3814 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3815 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3816 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3817 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3818 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3819 			GAUDI_PLDM_RESET_WAIT_MSEC :
3820 			GAUDI_CPU_RESET_WAIT_MSEC;
3821 }
3822 
3823 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3824 {
3825 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3826 
3827 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3828 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3829 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3830 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3831 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3832 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3833 }
3834 
3835 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3836 {
3837 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3838 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3839 
3840 	/* fill common fields */
3841 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3842 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3843 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3844 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3845 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3846 	fw_loader->skip_bmc = !hdev->bmc_enable;
3847 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3848 	fw_loader->dram_bar_id = HBM_BAR_ID;
3849 
3850 	if (prop->dynamic_fw_load)
3851 		gaudi_init_dynamic_firmware_loader(hdev);
3852 	else
3853 		gaudi_init_static_firmware_loader(hdev);
3854 }
3855 
3856 static int gaudi_init_cpu(struct hl_device *hdev)
3857 {
3858 	struct gaudi_device *gaudi = hdev->asic_specific;
3859 	int rc;
3860 
3861 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3862 		return 0;
3863 
3864 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3865 		return 0;
3866 
3867 	/*
3868 	 * The device CPU works with 40 bits addresses.
3869 	 * This register sets the extension to 50 bits.
3870 	 */
3871 	if (!hdev->asic_prop.fw_security_enabled)
3872 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3873 
3874 	rc = hl_fw_init_cpu(hdev);
3875 
3876 	if (rc)
3877 		return rc;
3878 
3879 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3880 
3881 	return 0;
3882 }
3883 
3884 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3885 {
3886 	struct cpu_dyn_regs *dyn_regs =
3887 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3888 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3889 	struct gaudi_device *gaudi = hdev->asic_specific;
3890 	u32 status, irq_handler_offset;
3891 	struct hl_eq *eq;
3892 	struct hl_hw_queue *cpu_pq =
3893 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3894 	int err;
3895 
3896 	if (!hdev->cpu_queues_enable)
3897 		return 0;
3898 
3899 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3900 		return 0;
3901 
3902 	eq = &hdev->event_queue;
3903 
3904 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3905 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3906 
3907 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3908 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3909 
3910 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3911 			lower_32_bits(hdev->cpu_accessible_dma_address));
3912 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3913 			upper_32_bits(hdev->cpu_accessible_dma_address));
3914 
3915 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3916 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3917 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3918 
3919 	/* Used for EQ CI */
3920 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3921 
3922 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3923 
3924 	if (gaudi->multi_msi_mode)
3925 		WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3926 	else
3927 		WREG32(mmCPU_IF_QUEUE_INIT,
3928 			PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3929 
3930 	irq_handler_offset = prop->gic_interrupts_enable ?
3931 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3932 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3933 
3934 	WREG32(irq_handler_offset,
3935 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3936 
3937 	err = hl_poll_timeout(
3938 		hdev,
3939 		mmCPU_IF_QUEUE_INIT,
3940 		status,
3941 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3942 		1000,
3943 		cpu_timeout);
3944 
3945 	if (err) {
3946 		dev_err(hdev->dev,
3947 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3948 		return -EIO;
3949 	}
3950 
3951 	/* update FW application security bits */
3952 	if (prop->fw_cpu_boot_dev_sts0_valid)
3953 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3954 	if (prop->fw_cpu_boot_dev_sts1_valid)
3955 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3956 
3957 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3958 	return 0;
3959 }
3960 
3961 static void gaudi_pre_hw_init(struct hl_device *hdev)
3962 {
3963 	/* Perform read from the device to make sure device is up */
3964 	RREG32(mmHW_STATE);
3965 
3966 	if (!hdev->asic_prop.fw_security_enabled) {
3967 		/* Set the access through PCI bars (Linux driver only) as
3968 		 * secured
3969 		 */
3970 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3971 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3972 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3973 
3974 		/* Perform read to flush the waiting writes to ensure
3975 		 * configuration was set in the device
3976 		 */
3977 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3978 	}
3979 
3980 	/*
3981 	 * Let's mark in the H/W that we have reached this point. We check
3982 	 * this value in the reset_before_init function to understand whether
3983 	 * we need to reset the chip before doing H/W init. This register is
3984 	 * cleared by the H/W upon H/W reset
3985 	 */
3986 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3987 }
3988 
3989 static int gaudi_hw_init(struct hl_device *hdev)
3990 {
3991 	struct gaudi_device *gaudi = hdev->asic_specific;
3992 	int rc;
3993 
3994 	gaudi_pre_hw_init(hdev);
3995 
3996 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3997 	 * So we set it here and if anyone tries to move it later to
3998 	 * a different address, there will be an error
3999 	 */
4000 	if (hdev->asic_prop.iatu_done_by_fw)
4001 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4002 
4003 	/*
4004 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
4005 	 * base address of dram
4006 	 */
4007 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4008 		dev_err(hdev->dev,
4009 			"failed to map HBM bar to DRAM base address\n");
4010 		return -EIO;
4011 	}
4012 
4013 	rc = gaudi_init_cpu(hdev);
4014 	if (rc) {
4015 		dev_err(hdev->dev, "failed to initialize CPU\n");
4016 		return rc;
4017 	}
4018 
4019 	/* In case the clock gating was enabled in preboot we need to disable
4020 	 * it here before touching the MME/TPC registers.
4021 	 */
4022 	gaudi_disable_clock_gating(hdev);
4023 
4024 	/* SRAM scrambler must be initialized after CPU is running from HBM */
4025 	gaudi_init_scrambler_sram(hdev);
4026 
4027 	/* This is here just in case we are working without CPU */
4028 	gaudi_init_scrambler_hbm(hdev);
4029 
4030 	gaudi_init_golden_registers(hdev);
4031 
4032 	rc = gaudi_mmu_init(hdev);
4033 	if (rc)
4034 		return rc;
4035 
4036 	gaudi_init_security(hdev);
4037 
4038 	gaudi_init_pci_dma_qmans(hdev);
4039 
4040 	gaudi_init_hbm_dma_qmans(hdev);
4041 
4042 	gaudi_init_mme_qmans(hdev);
4043 
4044 	gaudi_init_tpc_qmans(hdev);
4045 
4046 	gaudi_init_nic_qmans(hdev);
4047 
4048 	gaudi_enable_timestamp(hdev);
4049 
4050 	/* MSI must be enabled before CPU queues and NIC are initialized */
4051 	rc = gaudi_enable_msi(hdev);
4052 	if (rc)
4053 		goto disable_queues;
4054 
4055 	/* must be called after MSI was enabled */
4056 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4057 	if (rc) {
4058 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4059 			rc);
4060 		goto disable_msi;
4061 	}
4062 
4063 	/* Perform read from the device to flush all configuration */
4064 	RREG32(mmHW_STATE);
4065 
4066 	return 0;
4067 
4068 disable_msi:
4069 	gaudi_disable_msi(hdev);
4070 disable_queues:
4071 	gaudi_disable_mme_qmans(hdev);
4072 	gaudi_disable_pci_dma_qmans(hdev);
4073 
4074 	return rc;
4075 }
4076 
4077 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4078 {
4079 	struct cpu_dyn_regs *dyn_regs =
4080 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4081 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4082 	struct gaudi_device *gaudi = hdev->asic_specific;
4083 	bool driver_performs_reset;
4084 
4085 	if (!hard_reset) {
4086 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4087 		return 0;
4088 	}
4089 
4090 	if (hdev->pldm) {
4091 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4092 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4093 	} else {
4094 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4095 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4096 	}
4097 
4098 	if (fw_reset) {
4099 		dev_dbg(hdev->dev,
4100 			"Firmware performs HARD reset, going to wait %dms\n",
4101 			reset_timeout_ms);
4102 
4103 		goto skip_reset;
4104 	}
4105 
4106 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4107 					!hdev->asic_prop.hard_reset_done_by_fw);
4108 
4109 	/* Set device to handle FLR by H/W as we will put the device CPU to
4110 	 * halt mode
4111 	 */
4112 	if (driver_performs_reset)
4113 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4114 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4115 
4116 	/* If linux is loaded in the device CPU we need to communicate with it
4117 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4118 	 * registers in case of old F/Ws
4119 	 */
4120 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4121 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4122 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4123 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4124 
4125 		WREG32(irq_handler_offset,
4126 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4127 
4128 		/* This is a hail-mary attempt to revive the card in the small chance that the
4129 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4130 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4131 		 * reset as if Linux wasn't loaded.
4132 		 *
4133 		 * We do it only if the reset cause was HB, because that would be the indication
4134 		 * of such an event.
4135 		 *
4136 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4137 		 * damage.
4138 		 */
4139 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4140 			if (hdev->asic_prop.hard_reset_done_by_fw)
4141 				hl_fw_ask_hard_reset_without_linux(hdev);
4142 			else
4143 				hl_fw_ask_halt_machine_without_linux(hdev);
4144 		}
4145 	} else {
4146 		if (hdev->asic_prop.hard_reset_done_by_fw)
4147 			hl_fw_ask_hard_reset_without_linux(hdev);
4148 		else
4149 			hl_fw_ask_halt_machine_without_linux(hdev);
4150 	}
4151 
4152 	if (driver_performs_reset) {
4153 
4154 		/* Configure the reset registers. Must be done as early as
4155 		 * possible in case we fail during H/W initialization
4156 		 */
4157 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4158 						(CFG_RST_H_DMA_MASK |
4159 						CFG_RST_H_MME_MASK |
4160 						CFG_RST_H_SM_MASK |
4161 						CFG_RST_H_TPC_7_MASK));
4162 
4163 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4164 
4165 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4166 						(CFG_RST_H_HBM_MASK |
4167 						CFG_RST_H_TPC_7_MASK |
4168 						CFG_RST_H_NIC_MASK |
4169 						CFG_RST_H_SM_MASK |
4170 						CFG_RST_H_DMA_MASK |
4171 						CFG_RST_H_MME_MASK |
4172 						CFG_RST_H_CPU_MASK |
4173 						CFG_RST_H_MMU_MASK));
4174 
4175 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4176 						(CFG_RST_L_IF_MASK |
4177 						CFG_RST_L_PSOC_MASK |
4178 						CFG_RST_L_TPC_MASK));
4179 
4180 		msleep(cpu_timeout_ms);
4181 
4182 		/* Tell ASIC not to re-initialize PCIe */
4183 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4184 
4185 		/* Restart BTL/BLR upon hard-reset */
4186 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4187 
4188 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4189 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4190 
4191 		dev_dbg(hdev->dev,
4192 			"Issued HARD reset command, going to wait %dms\n",
4193 			reset_timeout_ms);
4194 	} else {
4195 		dev_dbg(hdev->dev,
4196 			"Firmware performs HARD reset, going to wait %dms\n",
4197 			reset_timeout_ms);
4198 	}
4199 
4200 skip_reset:
4201 	/*
4202 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4203 	 * itself is in reset. Need to wait until the reset is deasserted
4204 	 */
4205 	msleep(reset_timeout_ms);
4206 
4207 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4208 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4209 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4210 		return -ETIMEDOUT;
4211 	}
4212 
4213 	if (gaudi) {
4214 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4215 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4216 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4217 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4218 						HW_CAP_HBM_SCRAMBLER);
4219 
4220 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4221 
4222 		hdev->device_cpu_is_halted = false;
4223 	}
4224 	return 0;
4225 }
4226 
4227 static int gaudi_suspend(struct hl_device *hdev)
4228 {
4229 	int rc;
4230 
4231 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4232 	if (rc)
4233 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4234 
4235 	return rc;
4236 }
4237 
4238 static int gaudi_resume(struct hl_device *hdev)
4239 {
4240 	return gaudi_init_iatu(hdev);
4241 }
4242 
4243 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4244 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4245 {
4246 	int rc;
4247 
4248 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4249 			VM_DONTCOPY | VM_NORESERVE);
4250 
4251 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4252 				(dma_addr - HOST_PHYS_BASE), size);
4253 	if (rc)
4254 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4255 
4256 	return rc;
4257 }
4258 
4259 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4260 {
4261 	struct cpu_dyn_regs *dyn_regs =
4262 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4263 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4264 	struct gaudi_device *gaudi = hdev->asic_specific;
4265 	bool invalid_queue = false;
4266 	int dma_id;
4267 
4268 	switch (hw_queue_id) {
4269 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4270 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4271 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4273 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274 		break;
4275 
4276 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4277 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4278 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4280 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281 		break;
4282 
4283 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4284 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4285 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288 		break;
4289 
4290 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4291 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4292 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295 		break;
4296 
4297 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4298 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4299 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4300 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4301 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4302 		break;
4303 
4304 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4305 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4306 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4307 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4308 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4309 		break;
4310 
4311 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4312 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4313 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4314 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4315 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4316 		break;
4317 
4318 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4319 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4320 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4321 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4322 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4323 		break;
4324 
4325 	case GAUDI_QUEUE_ID_CPU_PQ:
4326 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4327 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4328 		else
4329 			invalid_queue = true;
4330 		break;
4331 
4332 	case GAUDI_QUEUE_ID_MME_0_0:
4333 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4334 		break;
4335 
4336 	case GAUDI_QUEUE_ID_MME_0_1:
4337 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4338 		break;
4339 
4340 	case GAUDI_QUEUE_ID_MME_0_2:
4341 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4342 		break;
4343 
4344 	case GAUDI_QUEUE_ID_MME_0_3:
4345 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4346 		break;
4347 
4348 	case GAUDI_QUEUE_ID_MME_1_0:
4349 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4350 		break;
4351 
4352 	case GAUDI_QUEUE_ID_MME_1_1:
4353 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4354 		break;
4355 
4356 	case GAUDI_QUEUE_ID_MME_1_2:
4357 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4358 		break;
4359 
4360 	case GAUDI_QUEUE_ID_MME_1_3:
4361 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4362 		break;
4363 
4364 	case GAUDI_QUEUE_ID_TPC_0_0:
4365 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4366 		break;
4367 
4368 	case GAUDI_QUEUE_ID_TPC_0_1:
4369 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4370 		break;
4371 
4372 	case GAUDI_QUEUE_ID_TPC_0_2:
4373 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4374 		break;
4375 
4376 	case GAUDI_QUEUE_ID_TPC_0_3:
4377 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4378 		break;
4379 
4380 	case GAUDI_QUEUE_ID_TPC_1_0:
4381 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4382 		break;
4383 
4384 	case GAUDI_QUEUE_ID_TPC_1_1:
4385 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4386 		break;
4387 
4388 	case GAUDI_QUEUE_ID_TPC_1_2:
4389 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4390 		break;
4391 
4392 	case GAUDI_QUEUE_ID_TPC_1_3:
4393 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4394 		break;
4395 
4396 	case GAUDI_QUEUE_ID_TPC_2_0:
4397 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4398 		break;
4399 
4400 	case GAUDI_QUEUE_ID_TPC_2_1:
4401 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4402 		break;
4403 
4404 	case GAUDI_QUEUE_ID_TPC_2_2:
4405 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4406 		break;
4407 
4408 	case GAUDI_QUEUE_ID_TPC_2_3:
4409 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4410 		break;
4411 
4412 	case GAUDI_QUEUE_ID_TPC_3_0:
4413 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4414 		break;
4415 
4416 	case GAUDI_QUEUE_ID_TPC_3_1:
4417 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4418 		break;
4419 
4420 	case GAUDI_QUEUE_ID_TPC_3_2:
4421 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4422 		break;
4423 
4424 	case GAUDI_QUEUE_ID_TPC_3_3:
4425 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4426 		break;
4427 
4428 	case GAUDI_QUEUE_ID_TPC_4_0:
4429 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4430 		break;
4431 
4432 	case GAUDI_QUEUE_ID_TPC_4_1:
4433 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4434 		break;
4435 
4436 	case GAUDI_QUEUE_ID_TPC_4_2:
4437 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4438 		break;
4439 
4440 	case GAUDI_QUEUE_ID_TPC_4_3:
4441 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_TPC_5_0:
4445 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4446 		break;
4447 
4448 	case GAUDI_QUEUE_ID_TPC_5_1:
4449 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4450 		break;
4451 
4452 	case GAUDI_QUEUE_ID_TPC_5_2:
4453 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4454 		break;
4455 
4456 	case GAUDI_QUEUE_ID_TPC_5_3:
4457 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4458 		break;
4459 
4460 	case GAUDI_QUEUE_ID_TPC_6_0:
4461 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4462 		break;
4463 
4464 	case GAUDI_QUEUE_ID_TPC_6_1:
4465 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4466 		break;
4467 
4468 	case GAUDI_QUEUE_ID_TPC_6_2:
4469 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4470 		break;
4471 
4472 	case GAUDI_QUEUE_ID_TPC_6_3:
4473 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4474 		break;
4475 
4476 	case GAUDI_QUEUE_ID_TPC_7_0:
4477 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4478 		break;
4479 
4480 	case GAUDI_QUEUE_ID_TPC_7_1:
4481 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4482 		break;
4483 
4484 	case GAUDI_QUEUE_ID_TPC_7_2:
4485 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4486 		break;
4487 
4488 	case GAUDI_QUEUE_ID_TPC_7_3:
4489 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4490 		break;
4491 
4492 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4493 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4494 			invalid_queue = true;
4495 
4496 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4497 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4498 		break;
4499 
4500 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4501 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4502 			invalid_queue = true;
4503 
4504 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4505 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4506 		break;
4507 
4508 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4509 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4510 			invalid_queue = true;
4511 
4512 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4513 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4514 		break;
4515 
4516 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4517 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4518 			invalid_queue = true;
4519 
4520 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4521 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4522 		break;
4523 
4524 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4525 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4526 			invalid_queue = true;
4527 
4528 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4529 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4530 		break;
4531 
4532 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4533 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4534 			invalid_queue = true;
4535 
4536 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4537 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4538 		break;
4539 
4540 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4541 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4542 			invalid_queue = true;
4543 
4544 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4545 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4546 		break;
4547 
4548 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4549 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4550 			invalid_queue = true;
4551 
4552 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4553 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4554 		break;
4555 
4556 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4557 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4558 			invalid_queue = true;
4559 
4560 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4561 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4562 		break;
4563 
4564 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4565 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4566 			invalid_queue = true;
4567 
4568 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4569 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4570 		break;
4571 
4572 	default:
4573 		invalid_queue = true;
4574 	}
4575 
4576 	if (invalid_queue) {
4577 		/* Should never get here */
4578 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4579 			hw_queue_id);
4580 		return;
4581 	}
4582 
4583 	db_value = pi;
4584 
4585 	/* ring the doorbell */
4586 	WREG32(db_reg_offset, db_value);
4587 
4588 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4589 		/* make sure device CPU will read latest data from host */
4590 		mb();
4591 
4592 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4593 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4594 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4595 
4596 		WREG32(irq_handler_offset,
4597 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4598 	}
4599 }
4600 
4601 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4602 				struct hl_bd *bd)
4603 {
4604 	__le64 *pbd = (__le64 *) bd;
4605 
4606 	/* The QMANs are on the host memory so a simple copy suffice */
4607 	pqe[0] = pbd[0];
4608 	pqe[1] = pbd[1];
4609 }
4610 
4611 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4612 					dma_addr_t *dma_handle, gfp_t flags)
4613 {
4614 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4615 						dma_handle, flags);
4616 
4617 	/* Shift to the device's base physical address of host memory */
4618 	if (kernel_addr)
4619 		*dma_handle += HOST_PHYS_BASE;
4620 
4621 	return kernel_addr;
4622 }
4623 
4624 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4625 		void *cpu_addr, dma_addr_t dma_handle)
4626 {
4627 	/* Cancel the device's base physical address of host memory */
4628 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4629 
4630 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4631 }
4632 
4633 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4634 {
4635 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4636 	u64 cur_addr = prop->dram_user_base_address;
4637 	u32 chunk_size, busy;
4638 	int rc, dma_id;
4639 
4640 	while (cur_addr < prop->dram_end_address) {
4641 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4642 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4643 
4644 			chunk_size =
4645 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4646 
4647 			dev_dbg(hdev->dev,
4648 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4649 				cur_addr, cur_addr + chunk_size);
4650 
4651 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4652 					lower_32_bits(val));
4653 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4654 					upper_32_bits(val));
4655 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4656 						lower_32_bits(cur_addr));
4657 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4658 						upper_32_bits(cur_addr));
4659 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4660 					chunk_size);
4661 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4662 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4663 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4664 
4665 			cur_addr += chunk_size;
4666 
4667 			if (cur_addr == prop->dram_end_address)
4668 				break;
4669 		}
4670 
4671 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4672 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4673 
4674 			rc = hl_poll_timeout(
4675 				hdev,
4676 				mmDMA0_CORE_STS0 + dma_offset,
4677 				busy,
4678 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4679 				1000,
4680 				HBM_SCRUBBING_TIMEOUT_US);
4681 
4682 			if (rc) {
4683 				dev_err(hdev->dev,
4684 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4685 					dma_id);
4686 				return -EIO;
4687 			}
4688 		}
4689 	}
4690 
4691 	return 0;
4692 }
4693 
4694 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4695 {
4696 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4697 	u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4698 			min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4699 	u64 addr, size, val = hdev->memory_scrub_val;
4700 	ktime_t timeout;
4701 	int rc = 0;
4702 
4703 	if (!hdev->memory_scrub)
4704 		return 0;
4705 
4706 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4707 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4708 		if (ktime_compare(ktime_get(), timeout) > 0) {
4709 			dev_err(hdev->dev, "waiting for idle timeout\n");
4710 			return -ETIMEDOUT;
4711 		}
4712 		usleep_range((1000 >> 2) + 1, 1000);
4713 	}
4714 
4715 	/* Scrub SRAM */
4716 	addr = prop->sram_user_base_address;
4717 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4718 
4719 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4720 			addr, addr + size, val);
4721 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4722 	if (rc) {
4723 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4724 		return rc;
4725 	}
4726 
4727 	/* Scrub HBM using all DMA channels in parallel */
4728 	rc = gaudi_scrub_device_dram(hdev, val);
4729 	if (rc) {
4730 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4731 		return rc;
4732 	}
4733 
4734 	return 0;
4735 }
4736 
4737 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4738 				u32 queue_id, dma_addr_t *dma_handle,
4739 				u16 *queue_len)
4740 {
4741 	struct gaudi_device *gaudi = hdev->asic_specific;
4742 	struct gaudi_internal_qman_info *q;
4743 
4744 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4745 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4746 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4747 		return NULL;
4748 	}
4749 
4750 	q = &gaudi->internal_qmans[queue_id];
4751 	*dma_handle = q->pq_dma_addr;
4752 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4753 
4754 	return q->pq_kernel_addr;
4755 }
4756 
4757 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4758 				u16 len, u32 timeout, u64 *result)
4759 {
4760 	struct gaudi_device *gaudi = hdev->asic_specific;
4761 
4762 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4763 		if (result)
4764 			*result = 0;
4765 		return 0;
4766 	}
4767 
4768 	if (!timeout)
4769 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4770 
4771 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4772 						timeout, result);
4773 }
4774 
4775 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4776 {
4777 	struct packet_msg_prot *fence_pkt;
4778 	dma_addr_t pkt_dma_addr;
4779 	u32 fence_val, tmp, timeout_usec;
4780 	dma_addr_t fence_dma_addr;
4781 	u32 *fence_ptr;
4782 	int rc;
4783 
4784 	if (hdev->pldm)
4785 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4786 	else
4787 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4788 
4789 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4790 
4791 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4792 	if (!fence_ptr) {
4793 		dev_err(hdev->dev,
4794 			"Failed to allocate memory for H/W queue %d testing\n",
4795 			hw_queue_id);
4796 		return -ENOMEM;
4797 	}
4798 
4799 	*fence_ptr = 0;
4800 
4801 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4802 						&pkt_dma_addr);
4803 	if (!fence_pkt) {
4804 		dev_err(hdev->dev,
4805 			"Failed to allocate packet for H/W queue %d testing\n",
4806 			hw_queue_id);
4807 		rc = -ENOMEM;
4808 		goto free_fence_ptr;
4809 	}
4810 
4811 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4812 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4813 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4814 
4815 	fence_pkt->ctl = cpu_to_le32(tmp);
4816 	fence_pkt->value = cpu_to_le32(fence_val);
4817 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4818 
4819 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4820 					sizeof(struct packet_msg_prot),
4821 					pkt_dma_addr);
4822 	if (rc) {
4823 		dev_err(hdev->dev,
4824 			"Failed to send fence packet to H/W queue %d\n",
4825 			hw_queue_id);
4826 		goto free_pkt;
4827 	}
4828 
4829 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4830 					1000, timeout_usec, true);
4831 
4832 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4833 
4834 	if (rc == -ETIMEDOUT) {
4835 		dev_err(hdev->dev,
4836 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4837 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4838 		rc = -EIO;
4839 	}
4840 
4841 free_pkt:
4842 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4843 free_fence_ptr:
4844 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4845 	return rc;
4846 }
4847 
4848 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4849 {
4850 	struct gaudi_device *gaudi = hdev->asic_specific;
4851 
4852 	/*
4853 	 * check capability here as send_cpu_message() won't update the result
4854 	 * value if no capability
4855 	 */
4856 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4857 		return 0;
4858 
4859 	return hl_fw_test_cpu_queue(hdev);
4860 }
4861 
4862 static int gaudi_test_queues(struct hl_device *hdev)
4863 {
4864 	int i, rc, ret_val = 0;
4865 
4866 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4867 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4868 			rc = gaudi_test_queue(hdev, i);
4869 			if (rc)
4870 				ret_val = -EINVAL;
4871 		}
4872 	}
4873 
4874 	rc = gaudi_test_cpu_queue(hdev);
4875 	if (rc)
4876 		ret_val = -EINVAL;
4877 
4878 	return ret_val;
4879 }
4880 
4881 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4882 		gfp_t mem_flags, dma_addr_t *dma_handle)
4883 {
4884 	void *kernel_addr;
4885 
4886 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4887 		return NULL;
4888 
4889 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4890 
4891 	/* Shift to the device's base physical address of host memory */
4892 	if (kernel_addr)
4893 		*dma_handle += HOST_PHYS_BASE;
4894 
4895 	return kernel_addr;
4896 }
4897 
4898 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4899 			dma_addr_t dma_addr)
4900 {
4901 	/* Cancel the device's base physical address of host memory */
4902 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4903 
4904 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4905 }
4906 
4907 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4908 					size_t size, dma_addr_t *dma_handle)
4909 {
4910 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4911 }
4912 
4913 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4914 						size_t size, void *vaddr)
4915 {
4916 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4917 }
4918 
4919 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4920 {
4921 	struct scatterlist *sg, *sg_next_iter;
4922 	u32 count, dma_desc_cnt;
4923 	u64 len, len_next;
4924 	dma_addr_t addr, addr_next;
4925 
4926 	dma_desc_cnt = 0;
4927 
4928 	for_each_sgtable_dma_sg(sgt, sg, count) {
4929 		len = sg_dma_len(sg);
4930 		addr = sg_dma_address(sg);
4931 
4932 		if (len == 0)
4933 			break;
4934 
4935 		while ((count + 1) < sgt->nents) {
4936 			sg_next_iter = sg_next(sg);
4937 			len_next = sg_dma_len(sg_next_iter);
4938 			addr_next = sg_dma_address(sg_next_iter);
4939 
4940 			if (len_next == 0)
4941 				break;
4942 
4943 			if ((addr + len == addr_next) &&
4944 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4945 				len += len_next;
4946 				count++;
4947 				sg = sg_next_iter;
4948 			} else {
4949 				break;
4950 			}
4951 		}
4952 
4953 		dma_desc_cnt++;
4954 	}
4955 
4956 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4957 }
4958 
4959 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4960 				struct hl_cs_parser *parser,
4961 				struct packet_lin_dma *user_dma_pkt,
4962 				u64 addr, enum dma_data_direction dir)
4963 {
4964 	struct hl_userptr *userptr;
4965 	int rc;
4966 
4967 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4968 			parser->job_userptr_list, &userptr))
4969 		goto already_pinned;
4970 
4971 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4972 	if (!userptr)
4973 		return -ENOMEM;
4974 
4975 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4976 				userptr);
4977 	if (rc)
4978 		goto free_userptr;
4979 
4980 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4981 
4982 	rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4983 	if (rc) {
4984 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4985 		goto unpin_memory;
4986 	}
4987 
4988 	userptr->dma_mapped = true;
4989 	userptr->dir = dir;
4990 
4991 already_pinned:
4992 	parser->patched_cb_size +=
4993 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4994 
4995 	return 0;
4996 
4997 unpin_memory:
4998 	list_del(&userptr->job_node);
4999 	hl_unpin_host_memory(hdev, userptr);
5000 free_userptr:
5001 	kfree(userptr);
5002 	return rc;
5003 }
5004 
5005 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5006 				struct hl_cs_parser *parser,
5007 				struct packet_lin_dma *user_dma_pkt,
5008 				bool src_in_host)
5009 {
5010 	enum dma_data_direction dir;
5011 	bool skip_host_mem_pin = false, user_memset;
5012 	u64 addr;
5013 	int rc = 0;
5014 
5015 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5016 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5017 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5018 
5019 	if (src_in_host) {
5020 		if (user_memset)
5021 			skip_host_mem_pin = true;
5022 
5023 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5024 		dir = DMA_TO_DEVICE;
5025 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5026 	} else {
5027 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5028 		dir = DMA_FROM_DEVICE;
5029 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5030 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5031 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5032 	}
5033 
5034 	if (skip_host_mem_pin)
5035 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5036 	else
5037 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5038 						addr, dir);
5039 
5040 	return rc;
5041 }
5042 
5043 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5044 				struct hl_cs_parser *parser,
5045 				struct packet_lin_dma *user_dma_pkt)
5046 {
5047 	bool src_in_host = false;
5048 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5049 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5050 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5051 
5052 	dev_dbg(hdev->dev, "DMA packet details:\n");
5053 	dev_dbg(hdev->dev, "source == 0x%llx\n",
5054 				le64_to_cpu(user_dma_pkt->src_addr));
5055 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5056 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5057 
5058 	/*
5059 	 * Special handling for DMA with size 0. Bypass all validations
5060 	 * because no transactions will be done except for WR_COMP, which
5061 	 * is not a security issue
5062 	 */
5063 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5064 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5065 		return 0;
5066 	}
5067 
5068 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5069 		src_in_host = true;
5070 
5071 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5072 						src_in_host);
5073 }
5074 
5075 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5076 					struct hl_cs_parser *parser,
5077 					struct packet_load_and_exe *user_pkt)
5078 {
5079 	u32 cfg;
5080 
5081 	cfg = le32_to_cpu(user_pkt->cfg);
5082 
5083 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5084 		dev_err(hdev->dev,
5085 			"User not allowed to use Load and Execute\n");
5086 		return -EPERM;
5087 	}
5088 
5089 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5090 
5091 	return 0;
5092 }
5093 
5094 static int gaudi_validate_cb(struct hl_device *hdev,
5095 			struct hl_cs_parser *parser, bool is_mmu)
5096 {
5097 	u32 cb_parsed_length = 0;
5098 	int rc = 0;
5099 
5100 	parser->patched_cb_size = 0;
5101 
5102 	/* cb_user_size is more than 0 so loop will always be executed */
5103 	while (cb_parsed_length < parser->user_cb_size) {
5104 		enum packet_id pkt_id;
5105 		u16 pkt_size;
5106 		struct gaudi_packet *user_pkt;
5107 
5108 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5109 
5110 		pkt_id = (enum packet_id) (
5111 				(le64_to_cpu(user_pkt->header) &
5112 				PACKET_HEADER_PACKET_ID_MASK) >>
5113 					PACKET_HEADER_PACKET_ID_SHIFT);
5114 
5115 		if (!validate_packet_id(pkt_id)) {
5116 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5117 			rc = -EINVAL;
5118 			break;
5119 		}
5120 
5121 		pkt_size = gaudi_packet_sizes[pkt_id];
5122 		cb_parsed_length += pkt_size;
5123 		if (cb_parsed_length > parser->user_cb_size) {
5124 			dev_err(hdev->dev,
5125 				"packet 0x%x is out of CB boundary\n", pkt_id);
5126 			rc = -EINVAL;
5127 			break;
5128 		}
5129 
5130 		switch (pkt_id) {
5131 		case PACKET_MSG_PROT:
5132 			dev_err(hdev->dev,
5133 				"User not allowed to use MSG_PROT\n");
5134 			rc = -EPERM;
5135 			break;
5136 
5137 		case PACKET_CP_DMA:
5138 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5139 			rc = -EPERM;
5140 			break;
5141 
5142 		case PACKET_STOP:
5143 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5144 			rc = -EPERM;
5145 			break;
5146 
5147 		case PACKET_WREG_BULK:
5148 			dev_err(hdev->dev,
5149 				"User not allowed to use WREG_BULK\n");
5150 			rc = -EPERM;
5151 			break;
5152 
5153 		case PACKET_LOAD_AND_EXE:
5154 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5155 				(struct packet_load_and_exe *) user_pkt);
5156 			break;
5157 
5158 		case PACKET_LIN_DMA:
5159 			parser->contains_dma_pkt = true;
5160 			if (is_mmu)
5161 				parser->patched_cb_size += pkt_size;
5162 			else
5163 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5164 					(struct packet_lin_dma *) user_pkt);
5165 			break;
5166 
5167 		case PACKET_WREG_32:
5168 		case PACKET_MSG_LONG:
5169 		case PACKET_MSG_SHORT:
5170 		case PACKET_REPEAT:
5171 		case PACKET_FENCE:
5172 		case PACKET_NOP:
5173 		case PACKET_ARB_POINT:
5174 			parser->patched_cb_size += pkt_size;
5175 			break;
5176 
5177 		default:
5178 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5179 				pkt_id);
5180 			rc = -EINVAL;
5181 			break;
5182 		}
5183 
5184 		if (rc)
5185 			break;
5186 	}
5187 
5188 	/*
5189 	 * The new CB should have space at the end for two MSG_PROT packets:
5190 	 * 1. Optional NOP padding for cacheline alignment
5191 	 * 2. A packet that will act as a completion packet
5192 	 * 3. A packet that will generate MSI interrupt
5193 	 */
5194 	if (parser->completion)
5195 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5196 			parser->patched_cb_size);
5197 
5198 	return rc;
5199 }
5200 
5201 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5202 				struct hl_cs_parser *parser,
5203 				struct packet_lin_dma *user_dma_pkt,
5204 				struct packet_lin_dma *new_dma_pkt,
5205 				u32 *new_dma_pkt_size)
5206 {
5207 	struct hl_userptr *userptr;
5208 	struct scatterlist *sg, *sg_next_iter;
5209 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5210 	u64 len, len_next;
5211 	dma_addr_t dma_addr, dma_addr_next;
5212 	u64 device_memory_addr, addr;
5213 	enum dma_data_direction dir;
5214 	struct sg_table *sgt;
5215 	bool src_in_host = false;
5216 	bool skip_host_mem_pin = false;
5217 	bool user_memset;
5218 
5219 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5220 
5221 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5222 		src_in_host = true;
5223 
5224 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5225 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5226 
5227 	if (src_in_host) {
5228 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5229 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5230 		dir = DMA_TO_DEVICE;
5231 		if (user_memset)
5232 			skip_host_mem_pin = true;
5233 	} else {
5234 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5235 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5236 		dir = DMA_FROM_DEVICE;
5237 	}
5238 
5239 	if ((!skip_host_mem_pin) &&
5240 		(!hl_userptr_is_pinned(hdev, addr,
5241 					le32_to_cpu(user_dma_pkt->tsize),
5242 					parser->job_userptr_list, &userptr))) {
5243 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5244 				addr, user_dma_pkt->tsize);
5245 		return -EFAULT;
5246 	}
5247 
5248 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5249 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5250 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5251 		return 0;
5252 	}
5253 
5254 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5255 
5256 	sgt = userptr->sgt;
5257 	dma_desc_cnt = 0;
5258 
5259 	for_each_sgtable_dma_sg(sgt, sg, count) {
5260 		len = sg_dma_len(sg);
5261 		dma_addr = sg_dma_address(sg);
5262 
5263 		if (len == 0)
5264 			break;
5265 
5266 		while ((count + 1) < sgt->nents) {
5267 			sg_next_iter = sg_next(sg);
5268 			len_next = sg_dma_len(sg_next_iter);
5269 			dma_addr_next = sg_dma_address(sg_next_iter);
5270 
5271 			if (len_next == 0)
5272 				break;
5273 
5274 			if ((dma_addr + len == dma_addr_next) &&
5275 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5276 				len += len_next;
5277 				count++;
5278 				sg = sg_next_iter;
5279 			} else {
5280 				break;
5281 			}
5282 		}
5283 
5284 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5285 		if (likely(dma_desc_cnt))
5286 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5287 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5288 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5289 		new_dma_pkt->tsize = cpu_to_le32(len);
5290 
5291 		if (dir == DMA_TO_DEVICE) {
5292 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5293 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5294 		} else {
5295 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5296 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5297 		}
5298 
5299 		if (!user_memset)
5300 			device_memory_addr += len;
5301 		dma_desc_cnt++;
5302 		new_dma_pkt++;
5303 	}
5304 
5305 	if (!dma_desc_cnt) {
5306 		dev_err(hdev->dev,
5307 			"Error of 0 SG entries when patching DMA packet\n");
5308 		return -EFAULT;
5309 	}
5310 
5311 	/* Fix the last dma packet - wrcomp must be as user set it */
5312 	new_dma_pkt--;
5313 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5314 
5315 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5316 
5317 	return 0;
5318 }
5319 
5320 static int gaudi_patch_cb(struct hl_device *hdev,
5321 				struct hl_cs_parser *parser)
5322 {
5323 	u32 cb_parsed_length = 0;
5324 	u32 cb_patched_cur_length = 0;
5325 	int rc = 0;
5326 
5327 	/* cb_user_size is more than 0 so loop will always be executed */
5328 	while (cb_parsed_length < parser->user_cb_size) {
5329 		enum packet_id pkt_id;
5330 		u16 pkt_size;
5331 		u32 new_pkt_size = 0;
5332 		struct gaudi_packet *user_pkt, *kernel_pkt;
5333 
5334 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5335 		kernel_pkt = parser->patched_cb->kernel_address +
5336 					cb_patched_cur_length;
5337 
5338 		pkt_id = (enum packet_id) (
5339 				(le64_to_cpu(user_pkt->header) &
5340 				PACKET_HEADER_PACKET_ID_MASK) >>
5341 					PACKET_HEADER_PACKET_ID_SHIFT);
5342 
5343 		if (!validate_packet_id(pkt_id)) {
5344 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5345 			rc = -EINVAL;
5346 			break;
5347 		}
5348 
5349 		pkt_size = gaudi_packet_sizes[pkt_id];
5350 		cb_parsed_length += pkt_size;
5351 		if (cb_parsed_length > parser->user_cb_size) {
5352 			dev_err(hdev->dev,
5353 				"packet 0x%x is out of CB boundary\n", pkt_id);
5354 			rc = -EINVAL;
5355 			break;
5356 		}
5357 
5358 		switch (pkt_id) {
5359 		case PACKET_LIN_DMA:
5360 			rc = gaudi_patch_dma_packet(hdev, parser,
5361 					(struct packet_lin_dma *) user_pkt,
5362 					(struct packet_lin_dma *) kernel_pkt,
5363 					&new_pkt_size);
5364 			cb_patched_cur_length += new_pkt_size;
5365 			break;
5366 
5367 		case PACKET_MSG_PROT:
5368 			dev_err(hdev->dev,
5369 				"User not allowed to use MSG_PROT\n");
5370 			rc = -EPERM;
5371 			break;
5372 
5373 		case PACKET_CP_DMA:
5374 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5375 			rc = -EPERM;
5376 			break;
5377 
5378 		case PACKET_STOP:
5379 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5380 			rc = -EPERM;
5381 			break;
5382 
5383 		case PACKET_WREG_32:
5384 		case PACKET_WREG_BULK:
5385 		case PACKET_MSG_LONG:
5386 		case PACKET_MSG_SHORT:
5387 		case PACKET_REPEAT:
5388 		case PACKET_FENCE:
5389 		case PACKET_NOP:
5390 		case PACKET_ARB_POINT:
5391 		case PACKET_LOAD_AND_EXE:
5392 			memcpy(kernel_pkt, user_pkt, pkt_size);
5393 			cb_patched_cur_length += pkt_size;
5394 			break;
5395 
5396 		default:
5397 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5398 				pkt_id);
5399 			rc = -EINVAL;
5400 			break;
5401 		}
5402 
5403 		if (rc)
5404 			break;
5405 	}
5406 
5407 	return rc;
5408 }
5409 
5410 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5411 		struct hl_cs_parser *parser)
5412 {
5413 	u64 handle;
5414 	u32 patched_cb_size;
5415 	struct hl_cb *user_cb;
5416 	int rc;
5417 
5418 	/*
5419 	 * The new CB should have space at the end for two MSG_PROT packets:
5420 	 * 1. Optional NOP padding for cacheline alignment
5421 	 * 2. A packet that will act as a completion packet
5422 	 * 3. A packet that will generate MSI interrupt
5423 	 */
5424 	if (parser->completion)
5425 		parser->patched_cb_size = parser->user_cb_size +
5426 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5427 	else
5428 		parser->patched_cb_size = parser->user_cb_size;
5429 
5430 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5431 				parser->patched_cb_size, false, false,
5432 				&handle);
5433 
5434 	if (rc) {
5435 		dev_err(hdev->dev,
5436 			"Failed to allocate patched CB for DMA CS %d\n",
5437 			rc);
5438 		return rc;
5439 	}
5440 
5441 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5442 	/* hl_cb_get should never fail */
5443 	if (!parser->patched_cb) {
5444 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5445 		rc = -EFAULT;
5446 		goto out;
5447 	}
5448 
5449 	/*
5450 	 * We are protected from overflow because the check
5451 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5452 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5453 	 *
5454 	 * There is no option to reach here without going through that check because:
5455 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5456 	 *    an external queue.
5457 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5458 	 */
5459 	memcpy(parser->patched_cb->kernel_address,
5460 		parser->user_cb->kernel_address,
5461 		parser->user_cb_size);
5462 
5463 	patched_cb_size = parser->patched_cb_size;
5464 
5465 	/* Validate patched CB instead of user CB */
5466 	user_cb = parser->user_cb;
5467 	parser->user_cb = parser->patched_cb;
5468 	rc = gaudi_validate_cb(hdev, parser, true);
5469 	parser->user_cb = user_cb;
5470 
5471 	if (rc) {
5472 		hl_cb_put(parser->patched_cb);
5473 		goto out;
5474 	}
5475 
5476 	if (patched_cb_size != parser->patched_cb_size) {
5477 		dev_err(hdev->dev, "user CB size mismatch\n");
5478 		hl_cb_put(parser->patched_cb);
5479 		rc = -EINVAL;
5480 		goto out;
5481 	}
5482 
5483 out:
5484 	/*
5485 	 * Always call cb destroy here because we still have 1 reference
5486 	 * to it by calling cb_get earlier. After the job will be completed,
5487 	 * cb_put will release it, but here we want to remove it from the
5488 	 * idr
5489 	 */
5490 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5491 
5492 	return rc;
5493 }
5494 
5495 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5496 		struct hl_cs_parser *parser)
5497 {
5498 	u64 handle;
5499 	int rc;
5500 
5501 	rc = gaudi_validate_cb(hdev, parser, false);
5502 
5503 	if (rc)
5504 		goto free_userptr;
5505 
5506 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5507 				parser->patched_cb_size, false, false,
5508 				&handle);
5509 	if (rc) {
5510 		dev_err(hdev->dev,
5511 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5512 		goto free_userptr;
5513 	}
5514 
5515 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5516 	/* hl_cb_get should never fail here */
5517 	if (!parser->patched_cb) {
5518 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5519 		rc = -EFAULT;
5520 		goto out;
5521 	}
5522 
5523 	rc = gaudi_patch_cb(hdev, parser);
5524 
5525 	if (rc)
5526 		hl_cb_put(parser->patched_cb);
5527 
5528 out:
5529 	/*
5530 	 * Always call cb destroy here because we still have 1 reference
5531 	 * to it by calling cb_get earlier. After the job will be completed,
5532 	 * cb_put will release it, but here we want to remove it from the
5533 	 * idr
5534 	 */
5535 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5536 
5537 free_userptr:
5538 	if (rc)
5539 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5540 	return rc;
5541 }
5542 
5543 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5544 					struct hl_cs_parser *parser)
5545 {
5546 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5547 	struct gaudi_device *gaudi = hdev->asic_specific;
5548 	u32 nic_queue_offset, nic_mask_q_id;
5549 
5550 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5551 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5552 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5553 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5554 
5555 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5556 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5557 			return -EINVAL;
5558 		}
5559 	}
5560 
5561 	/* For internal queue jobs just check if CB address is valid */
5562 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5563 					parser->user_cb_size,
5564 					asic_prop->sram_user_base_address,
5565 					asic_prop->sram_end_address))
5566 		return 0;
5567 
5568 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5569 					parser->user_cb_size,
5570 					asic_prop->dram_user_base_address,
5571 					asic_prop->dram_end_address))
5572 		return 0;
5573 
5574 	/* PMMU and HPMMU addresses are equal, check only one of them */
5575 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5576 					parser->user_cb_size,
5577 					asic_prop->pmmu.start_addr,
5578 					asic_prop->pmmu.end_addr))
5579 		return 0;
5580 
5581 	dev_err(hdev->dev,
5582 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5583 		parser->user_cb, parser->user_cb_size);
5584 
5585 	return -EFAULT;
5586 }
5587 
5588 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5589 {
5590 	struct gaudi_device *gaudi = hdev->asic_specific;
5591 
5592 	if (parser->queue_type == QUEUE_TYPE_INT)
5593 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5594 
5595 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5596 		return gaudi_parse_cb_mmu(hdev, parser);
5597 	else
5598 		return gaudi_parse_cb_no_mmu(hdev, parser);
5599 }
5600 
5601 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5602 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5603 				u32 msi_vec, bool eb)
5604 {
5605 	struct gaudi_device *gaudi = hdev->asic_specific;
5606 	struct packet_msg_prot *cq_pkt;
5607 	struct packet_nop *cq_padding;
5608 	u64 msi_addr;
5609 	u32 tmp;
5610 
5611 	cq_padding = kernel_address + original_len;
5612 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5613 
5614 	while ((void *)cq_padding < (void *)cq_pkt) {
5615 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5616 		cq_padding++;
5617 	}
5618 
5619 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5620 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5621 
5622 	if (eb)
5623 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5624 
5625 	cq_pkt->ctl = cpu_to_le32(tmp);
5626 	cq_pkt->value = cpu_to_le32(cq_val);
5627 	cq_pkt->addr = cpu_to_le64(cq_addr);
5628 
5629 	cq_pkt++;
5630 
5631 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5632 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5633 	cq_pkt->ctl = cpu_to_le32(tmp);
5634 	cq_pkt->value = cpu_to_le32(1);
5635 
5636 	if (gaudi->multi_msi_mode)
5637 		msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5638 	else
5639 		msi_addr = mmPCIE_CORE_MSI_REQ;
5640 
5641 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5642 }
5643 
5644 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5645 {
5646 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5647 }
5648 
5649 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5650 					u32 size, u64 val)
5651 {
5652 	struct packet_lin_dma *lin_dma_pkt;
5653 	struct hl_cs_job *job;
5654 	u32 cb_size, ctl, err_cause;
5655 	struct hl_cb *cb;
5656 	int rc;
5657 
5658 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5659 	if (!cb)
5660 		return -EFAULT;
5661 
5662 	lin_dma_pkt = cb->kernel_address;
5663 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5664 	cb_size = sizeof(*lin_dma_pkt);
5665 
5666 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5667 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5668 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5669 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5670 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5671 
5672 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5673 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5674 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5675 	lin_dma_pkt->tsize = cpu_to_le32(size);
5676 
5677 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5678 	if (!job) {
5679 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5680 		rc = -ENOMEM;
5681 		goto release_cb;
5682 	}
5683 
5684 	/* Verify DMA is OK */
5685 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5686 	if (err_cause && !hdev->init_done) {
5687 		dev_dbg(hdev->dev,
5688 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5689 			err_cause);
5690 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5691 	}
5692 
5693 	job->id = 0;
5694 	job->user_cb = cb;
5695 	atomic_inc(&job->user_cb->cs_cnt);
5696 	job->user_cb_size = cb_size;
5697 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5698 	job->patched_cb = job->user_cb;
5699 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5700 
5701 	hl_debugfs_add_job(hdev, job);
5702 
5703 	rc = gaudi_send_job_on_qman0(hdev, job);
5704 	hl_debugfs_remove_job(hdev, job);
5705 	kfree(job);
5706 	atomic_dec(&cb->cs_cnt);
5707 
5708 	/* Verify DMA is OK */
5709 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5710 	if (err_cause) {
5711 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5712 		rc = -EIO;
5713 		if (!hdev->init_done) {
5714 			dev_dbg(hdev->dev,
5715 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5716 				err_cause);
5717 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5718 		}
5719 	}
5720 
5721 release_cb:
5722 	hl_cb_put(cb);
5723 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5724 
5725 	return rc;
5726 }
5727 
5728 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5729 					u32 num_regs, u32 val)
5730 {
5731 	struct packet_msg_long *pkt;
5732 	struct hl_cs_job *job;
5733 	u32 cb_size, ctl;
5734 	struct hl_cb *cb;
5735 	int i, rc;
5736 
5737 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5738 
5739 	if (cb_size > SZ_2M) {
5740 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5741 		return -ENOMEM;
5742 	}
5743 
5744 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5745 	if (!cb)
5746 		return -EFAULT;
5747 
5748 	pkt = cb->kernel_address;
5749 
5750 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5751 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5752 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5753 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5754 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5755 
5756 	for (i = 0; i < num_regs ; i++, pkt++) {
5757 		pkt->ctl = cpu_to_le32(ctl);
5758 		pkt->value = cpu_to_le32(val);
5759 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5760 	}
5761 
5762 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5763 	if (!job) {
5764 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5765 		rc = -ENOMEM;
5766 		goto release_cb;
5767 	}
5768 
5769 	job->id = 0;
5770 	job->user_cb = cb;
5771 	atomic_inc(&job->user_cb->cs_cnt);
5772 	job->user_cb_size = cb_size;
5773 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5774 	job->patched_cb = job->user_cb;
5775 	job->job_cb_size = cb_size;
5776 
5777 	hl_debugfs_add_job(hdev, job);
5778 
5779 	rc = gaudi_send_job_on_qman0(hdev, job);
5780 	hl_debugfs_remove_job(hdev, job);
5781 	kfree(job);
5782 	atomic_dec(&cb->cs_cnt);
5783 
5784 release_cb:
5785 	hl_cb_put(cb);
5786 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5787 
5788 	return rc;
5789 }
5790 
5791 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5792 {
5793 	u64 base_addr;
5794 	u32 num_regs;
5795 	int rc;
5796 
5797 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5798 	num_regs = NUM_OF_SOB_IN_BLOCK;
5799 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5800 	if (rc) {
5801 		dev_err(hdev->dev, "failed resetting SM registers");
5802 		return -ENOMEM;
5803 	}
5804 
5805 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5806 	num_regs = NUM_OF_SOB_IN_BLOCK;
5807 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5808 	if (rc) {
5809 		dev_err(hdev->dev, "failed resetting SM registers");
5810 		return -ENOMEM;
5811 	}
5812 
5813 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5814 	num_regs = NUM_OF_SOB_IN_BLOCK;
5815 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5816 	if (rc) {
5817 		dev_err(hdev->dev, "failed resetting SM registers");
5818 		return -ENOMEM;
5819 	}
5820 
5821 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5822 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5823 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5824 	if (rc) {
5825 		dev_err(hdev->dev, "failed resetting SM registers");
5826 		return -ENOMEM;
5827 	}
5828 
5829 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5830 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5831 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5832 	if (rc) {
5833 		dev_err(hdev->dev, "failed resetting SM registers");
5834 		return -ENOMEM;
5835 	}
5836 
5837 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5838 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5839 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5840 	if (rc) {
5841 		dev_err(hdev->dev, "failed resetting SM registers");
5842 		return -ENOMEM;
5843 	}
5844 
5845 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5846 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5847 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5848 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5849 	if (rc) {
5850 		dev_err(hdev->dev, "failed resetting SM registers");
5851 		return -ENOMEM;
5852 	}
5853 
5854 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5855 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5856 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5857 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5858 	if (rc) {
5859 		dev_err(hdev->dev, "failed resetting SM registers");
5860 		return -ENOMEM;
5861 	}
5862 
5863 	return 0;
5864 }
5865 
5866 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5867 {
5868 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5869 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5870 	int i;
5871 
5872 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5873 		u64 sob_addr = CFG_BASE +
5874 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5875 				(i * sob_delta);
5876 		u32 dma_offset = i * DMA_CORE_OFFSET;
5877 
5878 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5879 				lower_32_bits(sob_addr));
5880 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5881 				upper_32_bits(sob_addr));
5882 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5883 
5884 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5885 		 * modified by the user for SRAM reduction
5886 		 */
5887 		if (i > 1)
5888 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5889 								0x00000001);
5890 	}
5891 }
5892 
5893 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5894 {
5895 	u32 qman_offset;
5896 	int i;
5897 
5898 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5899 		qman_offset = i * DMA_QMAN_OFFSET;
5900 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5901 	}
5902 
5903 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5904 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5905 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5906 	}
5907 
5908 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5909 		qman_offset = i * TPC_QMAN_OFFSET;
5910 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5911 	}
5912 
5913 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5914 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5915 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5916 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5917 	}
5918 }
5919 
5920 static int gaudi_restore_user_registers(struct hl_device *hdev)
5921 {
5922 	int rc;
5923 
5924 	rc = gaudi_restore_sm_registers(hdev);
5925 	if (rc)
5926 		return rc;
5927 
5928 	gaudi_restore_dma_registers(hdev);
5929 	gaudi_restore_qm_registers(hdev);
5930 
5931 	return 0;
5932 }
5933 
5934 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5935 {
5936 	return 0;
5937 }
5938 
5939 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5940 {
5941 	u32 size = hdev->asic_prop.mmu_pgt_size +
5942 			hdev->asic_prop.mmu_cache_mng_size;
5943 	struct gaudi_device *gaudi = hdev->asic_specific;
5944 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5945 
5946 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5947 		return 0;
5948 
5949 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5950 }
5951 
5952 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5953 {
5954 
5955 }
5956 
5957 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5958 					u32 size_to_dma, dma_addr_t dma_addr)
5959 {
5960 	u32 err_cause, val;
5961 	u64 dma_offset;
5962 	int rc;
5963 
5964 	dma_offset = dma_id * DMA_CORE_OFFSET;
5965 
5966 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5967 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5968 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5969 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5970 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5971 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5972 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5973 
5974 	rc = hl_poll_timeout(
5975 		hdev,
5976 		mmDMA0_CORE_STS0 + dma_offset,
5977 		val,
5978 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5979 		0,
5980 		1000000);
5981 
5982 	if (rc) {
5983 		dev_err(hdev->dev,
5984 			"DMA %d timed-out during reading of 0x%llx\n",
5985 			dma_id, addr);
5986 		return -EIO;
5987 	}
5988 
5989 	/* Verify DMA is OK */
5990 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5991 	if (err_cause) {
5992 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5993 		dev_dbg(hdev->dev,
5994 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5995 			err_cause);
5996 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5997 
5998 		return -EIO;
5999 	}
6000 
6001 	return 0;
6002 }
6003 
6004 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6005 				void *blob_addr)
6006 {
6007 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6008 	u32 qm_glbl_sts0, qm_cgm_sts;
6009 	u64 dma_offset, qm_offset;
6010 	dma_addr_t dma_addr;
6011 	void *kernel_addr;
6012 	bool is_eng_idle;
6013 	int rc = 0, dma_id;
6014 
6015 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6016 
6017 	if (!kernel_addr)
6018 		return -ENOMEM;
6019 
6020 	hdev->asic_funcs->hw_queues_lock(hdev);
6021 
6022 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6023 	dma_offset = dma_id * DMA_CORE_OFFSET;
6024 	qm_offset = dma_id * DMA_QMAN_OFFSET;
6025 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6026 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6027 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6028 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6029 		      IS_DMA_IDLE(dma_core_sts0);
6030 
6031 	if (!is_eng_idle) {
6032 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6033 		dma_offset = dma_id * DMA_CORE_OFFSET;
6034 		qm_offset = dma_id * DMA_QMAN_OFFSET;
6035 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6036 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6037 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6038 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6039 			      IS_DMA_IDLE(dma_core_sts0);
6040 
6041 		if (!is_eng_idle) {
6042 			dev_err_ratelimited(hdev->dev,
6043 				"Can't read via DMA because it is BUSY\n");
6044 			rc = -EAGAIN;
6045 			goto out;
6046 		}
6047 	}
6048 
6049 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6050 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6051 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6052 
6053 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6054 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6055 	 * ASID
6056 	 */
6057 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6058 
6059 	/* Verify DMA is OK */
6060 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6061 	if (err_cause) {
6062 		dev_dbg(hdev->dev,
6063 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
6064 			err_cause);
6065 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6066 	}
6067 
6068 	pos = 0;
6069 	size_left = size;
6070 	size_to_dma = SZ_2M;
6071 
6072 	while (size_left > 0) {
6073 
6074 		if (size_left < SZ_2M)
6075 			size_to_dma = size_left;
6076 
6077 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6078 						dma_addr);
6079 		if (rc)
6080 			break;
6081 
6082 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6083 
6084 		if (size_left <= SZ_2M)
6085 			break;
6086 
6087 		pos += SZ_2M;
6088 		addr += SZ_2M;
6089 		size_left -= SZ_2M;
6090 	}
6091 
6092 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6093 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6094 	 * ASID
6095 	 */
6096 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6097 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6098 
6099 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6100 
6101 out:
6102 	hdev->asic_funcs->hw_queues_unlock(hdev);
6103 
6104 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6105 
6106 	return rc;
6107 }
6108 
6109 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6110 {
6111 	struct gaudi_device *gaudi = hdev->asic_specific;
6112 
6113 	if (hdev->reset_info.hard_reset_pending)
6114 		return U64_MAX;
6115 
6116 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6117 			(addr - gaudi->hbm_bar_cur_addr));
6118 }
6119 
6120 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6121 {
6122 	struct gaudi_device *gaudi = hdev->asic_specific;
6123 
6124 	if (hdev->reset_info.hard_reset_pending)
6125 		return;
6126 
6127 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6128 			(addr - gaudi->hbm_bar_cur_addr));
6129 }
6130 
6131 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6132 {
6133 	/* mask to zero the MMBP and ASID bits */
6134 	WREG32_AND(reg, ~0x7FF);
6135 	WREG32_OR(reg, asid);
6136 }
6137 
6138 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6139 {
6140 	struct gaudi_device *gaudi = hdev->asic_specific;
6141 
6142 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6143 		return;
6144 
6145 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6146 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6147 		return;
6148 	}
6149 
6150 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6152 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6155 
6156 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6161 
6162 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6167 
6168 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173 
6174 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6176 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6179 
6180 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6185 
6186 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6191 
6192 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6197 
6198 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6206 
6207 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6213 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6214 
6215 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6221 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6222 
6223 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6224 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6225 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6226 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6227 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6228 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6229 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6230 
6231 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6232 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6233 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6234 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6235 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6236 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6237 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6238 
6239 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6240 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6241 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6242 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6243 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6244 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6245 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6246 
6247 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6248 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6249 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6250 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6251 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6252 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6253 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6254 
6255 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6256 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6257 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6258 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6259 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6260 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6261 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6262 
6263 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6264 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6265 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6266 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6267 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6268 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6269 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6270 
6271 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6272 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6273 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6274 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6275 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6276 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6277 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6278 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6279 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6280 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6281 
6282 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6283 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6284 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6285 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6286 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6287 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6288 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6289 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6290 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6291 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6292 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6293 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6294 
6295 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6296 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6297 				asid);
6298 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6299 				asid);
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6301 				asid);
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6305 				asid);
6306 	}
6307 
6308 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6309 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6310 				asid);
6311 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6312 				asid);
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6314 				asid);
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6318 				asid);
6319 	}
6320 
6321 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6322 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6323 				asid);
6324 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6325 				asid);
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6327 				asid);
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6331 				asid);
6332 	}
6333 
6334 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6335 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6336 				asid);
6337 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6338 				asid);
6339 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6340 				asid);
6341 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6342 				asid);
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6344 				asid);
6345 	}
6346 
6347 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6348 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6349 				asid);
6350 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6351 				asid);
6352 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6353 				asid);
6354 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6355 				asid);
6356 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6357 				asid);
6358 	}
6359 
6360 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6361 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6362 				asid);
6363 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6364 				asid);
6365 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6366 				asid);
6367 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6368 				asid);
6369 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6370 				asid);
6371 	}
6372 
6373 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6374 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6375 				asid);
6376 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6377 				asid);
6378 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6379 				asid);
6380 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6381 				asid);
6382 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6383 				asid);
6384 	}
6385 
6386 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6387 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6388 				asid);
6389 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6390 				asid);
6391 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6392 				asid);
6393 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6394 				asid);
6395 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6396 				asid);
6397 	}
6398 
6399 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6400 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6401 				asid);
6402 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6403 				asid);
6404 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6405 				asid);
6406 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6407 				asid);
6408 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6409 				asid);
6410 	}
6411 
6412 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6413 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6414 				asid);
6415 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6416 				asid);
6417 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6418 				asid);
6419 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6420 				asid);
6421 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6422 				asid);
6423 	}
6424 
6425 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6426 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6427 }
6428 
6429 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6430 		struct hl_cs_job *job)
6431 {
6432 	struct packet_msg_prot *fence_pkt;
6433 	u32 *fence_ptr;
6434 	dma_addr_t fence_dma_addr;
6435 	struct hl_cb *cb;
6436 	u32 tmp, timeout, dma_offset;
6437 	int rc;
6438 
6439 	if (hdev->pldm)
6440 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6441 	else
6442 		timeout = HL_DEVICE_TIMEOUT_USEC;
6443 
6444 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6445 	if (!fence_ptr) {
6446 		dev_err(hdev->dev,
6447 			"Failed to allocate fence memory for QMAN0\n");
6448 		return -ENOMEM;
6449 	}
6450 
6451 	cb = job->patched_cb;
6452 
6453 	fence_pkt = cb->kernel_address +
6454 			job->job_cb_size - sizeof(struct packet_msg_prot);
6455 
6456 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6457 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6458 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6459 
6460 	fence_pkt->ctl = cpu_to_le32(tmp);
6461 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6462 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6463 
6464 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6465 
6466 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6467 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6468 
6469 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6470 					job->job_cb_size, cb->bus_address);
6471 	if (rc) {
6472 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6473 		goto free_fence_ptr;
6474 	}
6475 
6476 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6477 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6478 				timeout, true);
6479 
6480 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6481 
6482 	if (rc == -ETIMEDOUT) {
6483 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6484 		goto free_fence_ptr;
6485 	}
6486 
6487 free_fence_ptr:
6488 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6489 
6490 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6491 	return rc;
6492 }
6493 
6494 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6495 {
6496 	if (event_type >= GAUDI_EVENT_SIZE)
6497 		goto event_not_supported;
6498 
6499 	if (!gaudi_irq_map_table[event_type].valid)
6500 		goto event_not_supported;
6501 
6502 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6503 
6504 	return;
6505 
6506 event_not_supported:
6507 	snprintf(desc, size, "N/A");
6508 }
6509 
6510 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6511 							bool is_write, u16 *engine_id_1,
6512 							u16 *engine_id_2)
6513 {
6514 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6515 
6516 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6517 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6518 
6519 	switch (x_y) {
6520 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6521 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6522 		dma_id[0] = 0;
6523 		dma_id[1] = 2;
6524 		break;
6525 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6526 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6527 		dma_id[0] = 1;
6528 		dma_id[1] = 3;
6529 		break;
6530 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6531 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6532 		dma_id[0] = 4;
6533 		dma_id[1] = 6;
6534 		break;
6535 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6536 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6537 		dma_id[0] = 5;
6538 		dma_id[1] = 7;
6539 		break;
6540 	default:
6541 		goto unknown_initiator;
6542 	}
6543 
6544 	for (i = 0 ; i < 2 ; i++) {
6545 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6546 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6547 	}
6548 
6549 	switch (x_y) {
6550 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6551 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6552 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6553 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6554 			return "DMA0";
6555 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6556 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6557 			return "DMA2";
6558 		} else {
6559 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6560 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6561 			return "DMA0 or DMA2";
6562 		}
6563 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6564 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6565 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6566 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6567 			return "DMA1";
6568 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6569 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6570 			return "DMA3";
6571 		} else {
6572 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6573 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6574 			return "DMA1 or DMA3";
6575 		}
6576 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6577 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6578 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6579 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6580 			return "DMA4";
6581 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6582 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6583 			return "DMA6";
6584 		} else {
6585 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6586 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6587 			return "DMA4 or DMA6";
6588 		}
6589 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6590 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6591 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6592 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6593 			return "DMA5";
6594 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6595 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6596 			return "DMA7";
6597 		} else {
6598 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6599 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6600 			return "DMA5 or DMA7";
6601 		}
6602 	}
6603 
6604 unknown_initiator:
6605 	return "unknown initiator";
6606 }
6607 
6608 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6609 							u16 *engine_id_1, u16 *engine_id_2)
6610 {
6611 	u32 val, x_y, axi_id;
6612 
6613 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6614 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6615 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6616 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6617 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6618 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6619 
6620 	switch (x_y) {
6621 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6622 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6623 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6624 			return "TPC0";
6625 		}
6626 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6627 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6628 			return "NIC0";
6629 		}
6630 		break;
6631 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6632 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6633 		return "TPC1";
6634 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6635 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6636 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6637 		return "MME0";
6638 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6639 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6640 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6641 		return "MME1";
6642 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6643 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6644 		return "TPC2";
6645 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6646 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6647 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6648 			return "TPC3";
6649 		}
6650 		/* PCI, CPU or PSOC does not have engine id*/
6651 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6652 			return "PCI";
6653 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6654 			return "CPU";
6655 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6656 			return "PSOC";
6657 		break;
6658 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6659 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6660 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6661 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6662 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6663 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6664 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6665 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6666 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6667 				engine_id_1, engine_id_2);
6668 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6669 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6670 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6671 			return "TPC4";
6672 		}
6673 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6674 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6675 			return "NIC1";
6676 		}
6677 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6678 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6679 			return "NIC2";
6680 		}
6681 		break;
6682 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6683 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6684 		return "TPC5";
6685 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6686 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6687 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6688 		return "MME2";
6689 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6690 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6691 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6692 		return "MME3";
6693 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6694 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6695 		return "TPC6";
6696 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6697 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6698 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6699 			return "TPC7";
6700 		}
6701 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6702 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6703 			return "NIC4";
6704 		}
6705 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6706 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6707 			return "NIC5";
6708 		}
6709 		break;
6710 	default:
6711 		break;
6712 	}
6713 
6714 	dev_err(hdev->dev,
6715 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6716 		val,
6717 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6718 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6719 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6720 			RAZWI_INITIATOR_AXI_ID_MASK);
6721 
6722 	return "unknown initiator";
6723 }
6724 
6725 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6726 						u16 *engine_id_2, bool *is_read, bool *is_write)
6727 {
6728 
6729 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6730 		dev_err_ratelimited(hdev->dev,
6731 			"RAZWI event caused by illegal write of %s\n",
6732 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6733 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6734 		*is_write = true;
6735 	}
6736 
6737 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6738 		dev_err_ratelimited(hdev->dev,
6739 			"RAZWI event caused by illegal read of %s\n",
6740 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6741 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6742 		*is_read = true;
6743 	}
6744 }
6745 
6746 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6747 {
6748 	struct gaudi_device *gaudi = hdev->asic_specific;
6749 	u32 val;
6750 
6751 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6752 		return;
6753 
6754 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6755 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6756 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6757 		*addr <<= 32;
6758 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6759 
6760 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6761 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6762 
6763 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6764 	}
6765 
6766 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6767 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6768 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6769 		*addr <<= 32;
6770 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6771 
6772 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6773 
6774 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6775 	}
6776 }
6777 
6778 /*
6779  *  +-------------------+------------------------------------------------------+
6780  *  | Configuration Reg |                     Description                      |
6781  *  |      Address      |                                                      |
6782  *  +-------------------+------------------------------------------------------+
6783  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6784  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6785  *  |                   |0xF34 memory wrappers 63:32                           |
6786  *  |                   |0xF38 memory wrappers 95:64                           |
6787  *  |                   |0xF3C memory wrappers 127:96                          |
6788  *  +-------------------+------------------------------------------------------+
6789  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6790  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6791  *  |                   |0xF44 memory wrappers 63:32                           |
6792  *  |                   |0xF48 memory wrappers 95:64                           |
6793  *  |                   |0xF4C memory wrappers 127:96                          |
6794  *  +-------------------+------------------------------------------------------+
6795  */
6796 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6797 		struct ecc_info_extract_params *params, u64 *ecc_address,
6798 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6799 {
6800 	u32 i, num_mem_regs, reg, err_bit;
6801 	u64 err_addr, err_word = 0;
6802 
6803 	num_mem_regs = params->num_memories / 32 +
6804 			((params->num_memories % 32) ? 1 : 0);
6805 
6806 	if (params->block_address >= CFG_BASE)
6807 		params->block_address -= CFG_BASE;
6808 
6809 	if (params->derr)
6810 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6811 	else
6812 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6813 
6814 	/* Set invalid wrapper index */
6815 	*memory_wrapper_idx = 0xFF;
6816 
6817 	/* Iterate through memory wrappers, a single bit must be set */
6818 	for (i = 0 ; i < num_mem_regs ; i++) {
6819 		err_addr += i * 4;
6820 		err_word = RREG32(err_addr);
6821 		if (err_word) {
6822 			err_bit = __ffs(err_word);
6823 			*memory_wrapper_idx = err_bit + (32 * i);
6824 			break;
6825 		}
6826 	}
6827 
6828 	if (*memory_wrapper_idx == 0xFF) {
6829 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6830 		return -EINVAL;
6831 	}
6832 
6833 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6834 			*memory_wrapper_idx);
6835 
6836 	*ecc_address =
6837 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6838 	*ecc_syndrom =
6839 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6840 
6841 	/* Clear error indication */
6842 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6843 	if (params->derr)
6844 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6845 	else
6846 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6847 
6848 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6849 
6850 	return 0;
6851 }
6852 
6853 /*
6854  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6855  *
6856  * @idx: the current pi/ci value
6857  * @q_len: the queue length (power of 2)
6858  *
6859  * @return the cyclically decremented index
6860  */
6861 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6862 {
6863 	u32 mask = q_len - 1;
6864 
6865 	/*
6866 	 * modular decrement is equivalent to adding (queue_size -1)
6867 	 * later we take LSBs to make sure the value is in the
6868 	 * range [0, queue_len - 1]
6869 	 */
6870 	return (idx + q_len - 1) & mask;
6871 }
6872 
6873 /**
6874  * gaudi_handle_sw_config_stream_data - print SW config stream data
6875  *
6876  * @hdev: pointer to the habanalabs device structure
6877  * @stream: the QMAN's stream
6878  * @qman_base: base address of QMAN registers block
6879  * @event_mask: mask of the last events occurred
6880  */
6881 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6882 						u64 qman_base, u64 event_mask)
6883 {
6884 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6885 	u32 cq_ptr_lo_off, size;
6886 
6887 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6888 
6889 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6890 						stream * cq_ptr_lo_off;
6891 	cq_ptr_hi = cq_ptr_lo +
6892 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6893 	cq_tsize = cq_ptr_lo +
6894 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6895 
6896 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6897 	size = RREG32(cq_tsize);
6898 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6899 							stream, cq_ptr, size);
6900 
6901 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6902 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6903 		hdev->captured_err_info.undef_opcode.cq_size = size;
6904 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6905 	}
6906 }
6907 
6908 /**
6909  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6910  *
6911  * @hdev: pointer to the habanalabs device structure
6912  * @qid_base: first QID of the QMAN (out of 4 streams)
6913  * @stream: the QMAN's stream
6914  * @qman_base: base address of QMAN registers block
6915  * @event_mask: mask of the last events occurred
6916  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6917  */
6918 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6919 						u32 stream, u64 qman_base,
6920 						u64 event_mask,
6921 						bool pr_sw_conf)
6922 {
6923 	u32 ci, qm_ci_stream_off, queue_len;
6924 	struct hl_hw_queue *q;
6925 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6926 	int i;
6927 
6928 	q = &hdev->kernel_queues[qid_base + stream];
6929 
6930 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6931 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6932 						stream * qm_ci_stream_off;
6933 
6934 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6935 					q->int_queue_len : HL_QUEUE_LENGTH;
6936 
6937 	hdev->asic_funcs->hw_queues_lock(hdev);
6938 
6939 	if (pr_sw_conf)
6940 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6941 
6942 	ci = RREG32(pq_ci);
6943 
6944 	/* we should start printing form ci -1 */
6945 	ci = gaudi_queue_idx_dec(ci, queue_len);
6946 	memset(addr, 0, sizeof(addr));
6947 
6948 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6949 		struct hl_bd *bd;
6950 		u32 len;
6951 
6952 		bd = q->kernel_address;
6953 		bd += ci;
6954 
6955 		len = le32_to_cpu(bd->len);
6956 		/* len 0 means uninitialized entry- break */
6957 		if (!len)
6958 			break;
6959 
6960 		addr[i] = le64_to_cpu(bd->ptr);
6961 
6962 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6963 							stream, ci, addr[i], len);
6964 
6965 		/* get previous ci, wrap if needed */
6966 		ci = gaudi_queue_idx_dec(ci, queue_len);
6967 	}
6968 
6969 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6970 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6971 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6972 
6973 		if (arr_idx == 0) {
6974 			undef_opcode->timestamp = ktime_get();
6975 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6976 		}
6977 
6978 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6979 		undef_opcode->cb_addr_streams_len++;
6980 	}
6981 
6982 	hdev->asic_funcs->hw_queues_unlock(hdev);
6983 }
6984 
6985 /**
6986  * handle_qman_data_on_err - extract QMAN data on error
6987  *
6988  * @hdev: pointer to the habanalabs device structure
6989  * @qid_base: first QID of the QMAN (out of 4 streams)
6990  * @stream: the QMAN's stream
6991  * @qman_base: base address of QMAN registers block
6992  * @event_mask: mask of the last events occurred
6993  *
6994  * This function attempt to exatract as much data as possible on QMAN error.
6995  * On upper CP print the SW config stream data and last 8 PQEs.
6996  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6997  */
6998 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6999 				   u32 stream, u64 qman_base, u64 event_mask)
7000 {
7001 	u32 i;
7002 
7003 	if (stream != QMAN_STREAMS) {
7004 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7005 			qman_base, event_mask, true);
7006 		return;
7007 	}
7008 
7009 	/* handle Lower-CP */
7010 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7011 
7012 	for (i = 0; i < QMAN_STREAMS; i++)
7013 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7014 			qman_base, event_mask, false);
7015 }
7016 
7017 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7018 					  const char *qm_name,
7019 					  u64 qman_base,
7020 					  u32 qid_base,
7021 					  u64 *event_mask)
7022 {
7023 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7024 	u64 glbl_sts_addr, arb_err_addr;
7025 	char reg_desc[32];
7026 
7027 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7028 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7029 
7030 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
7031 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7032 		glbl_sts_clr_val = 0;
7033 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7034 
7035 		if (!glbl_sts_val)
7036 			continue;
7037 
7038 		if (i == QMAN_STREAMS)
7039 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7040 		else
7041 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7042 
7043 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7044 			if (glbl_sts_val & BIT(j)) {
7045 				dev_err_ratelimited(hdev->dev,
7046 						"%s %s. err cause: %s\n",
7047 						qm_name, reg_desc,
7048 						gaudi_qman_error_cause[j]);
7049 				glbl_sts_clr_val |= BIT(j);
7050 			}
7051 		}
7052 		/* check for undefined opcode */
7053 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7054 				hdev->captured_err_info.undef_opcode.write_enable) {
7055 			memset(&hdev->captured_err_info.undef_opcode, 0,
7056 						sizeof(hdev->captured_err_info.undef_opcode));
7057 
7058 			hdev->captured_err_info.undef_opcode.write_enable = false;
7059 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7060 		}
7061 
7062 		/* Write 1 clear errors */
7063 		if (!hdev->stop_on_err)
7064 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7065 		else
7066 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7067 	}
7068 
7069 	arb_err_val = RREG32(arb_err_addr);
7070 
7071 	if (!arb_err_val)
7072 		return;
7073 
7074 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7075 		if (arb_err_val & BIT(j)) {
7076 			dev_err_ratelimited(hdev->dev,
7077 					"%s ARB_ERR. err cause: %s\n",
7078 					qm_name,
7079 					gaudi_qman_arb_error_cause[j]);
7080 		}
7081 	}
7082 }
7083 
7084 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7085 		struct hl_eq_sm_sei_data *sei_data)
7086 {
7087 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7088 
7089 	/* Flip the bits as the enum is ordered in the opposite way */
7090 	index = (index ^ 0x3) & 0x3;
7091 
7092 	switch (sei_data->sei_cause) {
7093 	case SM_SEI_SO_OVERFLOW:
7094 		dev_err_ratelimited(hdev->dev,
7095 			"%s SEI Error: SOB Group %u overflow/underflow",
7096 			gaudi_sync_manager_names[index],
7097 			le32_to_cpu(sei_data->sei_log));
7098 		break;
7099 	case SM_SEI_LBW_4B_UNALIGNED:
7100 		dev_err_ratelimited(hdev->dev,
7101 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7102 			gaudi_sync_manager_names[index],
7103 			le32_to_cpu(sei_data->sei_log));
7104 		break;
7105 	case SM_SEI_AXI_RESPONSE_ERR:
7106 		dev_err_ratelimited(hdev->dev,
7107 			"%s SEI Error: AXI ID %u response error",
7108 			gaudi_sync_manager_names[index],
7109 			le32_to_cpu(sei_data->sei_log));
7110 		break;
7111 	default:
7112 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7113 				le32_to_cpu(sei_data->sei_log));
7114 		break;
7115 	}
7116 }
7117 
7118 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7119 		struct hl_eq_ecc_data *ecc_data)
7120 {
7121 	struct ecc_info_extract_params params;
7122 	u64 ecc_address = 0, ecc_syndrom = 0;
7123 	u8 index, memory_wrapper_idx = 0;
7124 	bool extract_info_from_fw;
7125 	int rc;
7126 
7127 	if (hdev->asic_prop.fw_security_enabled) {
7128 		extract_info_from_fw = true;
7129 		goto extract_ecc_info;
7130 	}
7131 
7132 	switch (event_type) {
7133 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7134 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7135 		extract_info_from_fw = true;
7136 		break;
7137 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7138 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7139 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7140 		params.num_memories = 90;
7141 		params.derr = false;
7142 		extract_info_from_fw = false;
7143 		break;
7144 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7145 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7146 		params.block_address =
7147 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7148 		params.num_memories = 90;
7149 		params.derr = true;
7150 		extract_info_from_fw = false;
7151 		break;
7152 	case GAUDI_EVENT_MME0_ACC_SERR:
7153 	case GAUDI_EVENT_MME1_ACC_SERR:
7154 	case GAUDI_EVENT_MME2_ACC_SERR:
7155 	case GAUDI_EVENT_MME3_ACC_SERR:
7156 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7157 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7158 		params.num_memories = 128;
7159 		params.derr = false;
7160 		extract_info_from_fw = false;
7161 		break;
7162 	case GAUDI_EVENT_MME0_ACC_DERR:
7163 	case GAUDI_EVENT_MME1_ACC_DERR:
7164 	case GAUDI_EVENT_MME2_ACC_DERR:
7165 	case GAUDI_EVENT_MME3_ACC_DERR:
7166 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7167 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7168 		params.num_memories = 128;
7169 		params.derr = true;
7170 		extract_info_from_fw = false;
7171 		break;
7172 	case GAUDI_EVENT_MME0_SBAB_SERR:
7173 	case GAUDI_EVENT_MME1_SBAB_SERR:
7174 	case GAUDI_EVENT_MME2_SBAB_SERR:
7175 	case GAUDI_EVENT_MME3_SBAB_SERR:
7176 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7177 		params.block_address =
7178 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7179 		params.num_memories = 33;
7180 		params.derr = false;
7181 		extract_info_from_fw = false;
7182 		break;
7183 	case GAUDI_EVENT_MME0_SBAB_DERR:
7184 	case GAUDI_EVENT_MME1_SBAB_DERR:
7185 	case GAUDI_EVENT_MME2_SBAB_DERR:
7186 	case GAUDI_EVENT_MME3_SBAB_DERR:
7187 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7188 		params.block_address =
7189 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7190 		params.num_memories = 33;
7191 		params.derr = true;
7192 		extract_info_from_fw = false;
7193 		break;
7194 	default:
7195 		return;
7196 	}
7197 
7198 extract_ecc_info:
7199 	if (extract_info_from_fw) {
7200 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7201 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7202 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7203 	} else {
7204 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7205 				&ecc_syndrom, &memory_wrapper_idx);
7206 		if (rc)
7207 			return;
7208 	}
7209 
7210 	dev_err(hdev->dev,
7211 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7212 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7213 }
7214 
7215 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7216 {
7217 	u64 qman_base;
7218 	char desc[32];
7219 	u32 qid_base;
7220 	u8 index;
7221 
7222 	switch (event_type) {
7223 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7224 		index = event_type - GAUDI_EVENT_TPC0_QM;
7225 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7226 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7227 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7228 		break;
7229 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7230 		if (event_type == GAUDI_EVENT_MME0_QM) {
7231 			index = 0;
7232 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7233 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7234 			index = 2;
7235 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7236 		}
7237 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7238 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7239 		break;
7240 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7241 		index = event_type - GAUDI_EVENT_DMA0_QM;
7242 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7243 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7244 		if (index > 1)
7245 			qid_base++;
7246 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7247 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7248 		break;
7249 	case GAUDI_EVENT_NIC0_QM0:
7250 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7251 		qman_base = mmNIC0_QM0_BASE;
7252 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7253 		break;
7254 	case GAUDI_EVENT_NIC0_QM1:
7255 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7256 		qman_base = mmNIC0_QM1_BASE;
7257 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7258 		break;
7259 	case GAUDI_EVENT_NIC1_QM0:
7260 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7261 		qman_base = mmNIC1_QM0_BASE;
7262 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7263 		break;
7264 	case GAUDI_EVENT_NIC1_QM1:
7265 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7266 		qman_base = mmNIC1_QM1_BASE;
7267 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7268 		break;
7269 	case GAUDI_EVENT_NIC2_QM0:
7270 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7271 		qman_base = mmNIC2_QM0_BASE;
7272 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7273 		break;
7274 	case GAUDI_EVENT_NIC2_QM1:
7275 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7276 		qman_base = mmNIC2_QM1_BASE;
7277 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7278 		break;
7279 	case GAUDI_EVENT_NIC3_QM0:
7280 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7281 		qman_base = mmNIC3_QM0_BASE;
7282 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7283 		break;
7284 	case GAUDI_EVENT_NIC3_QM1:
7285 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7286 		qman_base = mmNIC3_QM1_BASE;
7287 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7288 		break;
7289 	case GAUDI_EVENT_NIC4_QM0:
7290 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7291 		qman_base = mmNIC4_QM0_BASE;
7292 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7293 		break;
7294 	case GAUDI_EVENT_NIC4_QM1:
7295 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7296 		qman_base = mmNIC4_QM1_BASE;
7297 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7298 		break;
7299 	default:
7300 		return;
7301 	}
7302 
7303 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7304 }
7305 
7306 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7307 					bool check_razwi, u64 *event_mask)
7308 {
7309 	bool is_read = false, is_write = false;
7310 	u16 engine_id[2], num_of_razwi_eng = 0;
7311 	char desc[64] = "";
7312 	u64 razwi_addr = 0;
7313 	u8 razwi_flags = 0;
7314 
7315 	/*
7316 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7317 	 * engine id it will get valid value.
7318 	 */
7319 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7320 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7321 
7322 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7323 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7324 		event_type, desc);
7325 
7326 	if (check_razwi) {
7327 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7328 						&is_write);
7329 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7330 
7331 		if (is_read)
7332 			razwi_flags |= HL_RAZWI_READ;
7333 		if (is_write)
7334 			razwi_flags |= HL_RAZWI_WRITE;
7335 
7336 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7337 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7338 				num_of_razwi_eng = 2;
7339 			else
7340 				num_of_razwi_eng = 1;
7341 		}
7342 
7343 		if (razwi_flags)
7344 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7345 					razwi_flags, event_mask);
7346 	}
7347 }
7348 
7349 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7350 					struct cpucp_pkt_sync_err *sync_err)
7351 {
7352 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7353 
7354 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7355 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7356 }
7357 
7358 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7359 					struct hl_eq_fw_alive *fw_alive)
7360 {
7361 	dev_err(hdev->dev,
7362 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7363 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7364 		le32_to_cpu(fw_alive->process_id),
7365 		le32_to_cpu(fw_alive->thread_id),
7366 		le64_to_cpu(fw_alive->uptime_seconds));
7367 }
7368 
7369 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7370 						void *data)
7371 {
7372 	char desc[64] = "", *type;
7373 	struct eq_nic_sei_event *eq_nic_sei = data;
7374 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7375 
7376 	switch (eq_nic_sei->axi_error_cause) {
7377 	case RXB:
7378 		type = "RXB";
7379 		break;
7380 	case RXE:
7381 		type = "RXE";
7382 		break;
7383 	case TXS:
7384 		type = "TXS";
7385 		break;
7386 	case TXE:
7387 		type = "TXE";
7388 		break;
7389 	case QPC_RESP:
7390 		type = "QPC_RESP";
7391 		break;
7392 	case NON_AXI_ERR:
7393 		type = "NON_AXI_ERR";
7394 		break;
7395 	case TMR:
7396 		type = "TMR";
7397 		break;
7398 	default:
7399 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7400 			eq_nic_sei->axi_error_cause);
7401 		type = "N/A";
7402 		break;
7403 	}
7404 
7405 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7406 			eq_nic_sei->id);
7407 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7408 		event_type, desc);
7409 }
7410 
7411 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7412 {
7413 	/* GAUDI doesn't support any reset except hard-reset */
7414 	return -EPERM;
7415 }
7416 
7417 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7418 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7419 {
7420 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7421 	int rc = 0;
7422 
7423 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7424 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7425 		if (!hbm_ecc_data) {
7426 			dev_err(hdev->dev, "No FW ECC data");
7427 			return 0;
7428 		}
7429 
7430 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7431 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7432 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7433 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7435 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7437 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7438 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7439 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7440 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7441 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7442 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7443 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7444 
7445 		dev_err(hdev->dev,
7446 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7447 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7448 		dev_err(hdev->dev,
7449 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7450 			device, ch, hbm_ecc_data->first_addr, type,
7451 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7452 			hbm_ecc_data->dec_cnt);
7453 		return 0;
7454 	}
7455 
7456 	if (hdev->asic_prop.fw_security_enabled) {
7457 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7458 		return 0;
7459 	}
7460 
7461 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7462 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7463 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7464 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7465 		if (val) {
7466 			rc = -EIO;
7467 			dev_err(hdev->dev,
7468 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7469 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7470 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7471 				(val >> 4) & 0x1);
7472 
7473 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7474 			dev_err(hdev->dev,
7475 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7476 				device, ch * 2,
7477 				RREG32(base + ch * 0x1000 + 0x064),
7478 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7479 				(val2 & 0xFF0000) >> 16,
7480 				(val2 & 0xFF000000) >> 24);
7481 		}
7482 
7483 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7484 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7485 		if (val) {
7486 			rc = -EIO;
7487 			dev_err(hdev->dev,
7488 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7489 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7490 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7491 				(val >> 4) & 0x1);
7492 
7493 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7494 			dev_err(hdev->dev,
7495 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7496 				device, ch * 2 + 1,
7497 				RREG32(base + ch * 0x1000 + 0x074),
7498 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7499 				(val2 & 0xFF0000) >> 16,
7500 				(val2 & 0xFF000000) >> 24);
7501 		}
7502 
7503 		/* Clear interrupts */
7504 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7505 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7506 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7507 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7508 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7509 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7510 	}
7511 
7512 	val  = RREG32(base + 0x8F30);
7513 	val2 = RREG32(base + 0x8F34);
7514 	if (val | val2) {
7515 		rc = -EIO;
7516 		dev_err(hdev->dev,
7517 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7518 			device, val, val2);
7519 	}
7520 	val  = RREG32(base + 0x8F40);
7521 	val2 = RREG32(base + 0x8F44);
7522 	if (val | val2) {
7523 		rc = -EIO;
7524 		dev_err(hdev->dev,
7525 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7526 			device, val, val2);
7527 	}
7528 
7529 	return rc;
7530 }
7531 
7532 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7533 {
7534 	switch (hbm_event_type) {
7535 	case GAUDI_EVENT_HBM0_SPI_0:
7536 	case GAUDI_EVENT_HBM0_SPI_1:
7537 		return 0;
7538 	case GAUDI_EVENT_HBM1_SPI_0:
7539 	case GAUDI_EVENT_HBM1_SPI_1:
7540 		return 1;
7541 	case GAUDI_EVENT_HBM2_SPI_0:
7542 	case GAUDI_EVENT_HBM2_SPI_1:
7543 		return 2;
7544 	case GAUDI_EVENT_HBM3_SPI_0:
7545 	case GAUDI_EVENT_HBM3_SPI_1:
7546 		return 3;
7547 	default:
7548 		break;
7549 	}
7550 
7551 	/* Should never happen */
7552 	return 0;
7553 }
7554 
7555 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7556 					char *interrupt_name)
7557 {
7558 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7559 	bool soft_reset_required = false;
7560 
7561 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7562 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7563 
7564 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7565 		if (tpc_interrupts_cause & BIT(i)) {
7566 			dev_err_ratelimited(hdev->dev,
7567 					"TPC%d_%s interrupt cause: %s\n",
7568 					tpc_id, interrupt_name,
7569 					gaudi_tpc_interrupts_cause[i]);
7570 			/* If this is QM error, we need to soft-reset */
7571 			if (i == 15)
7572 				soft_reset_required = true;
7573 		}
7574 
7575 	/* Clear interrupts */
7576 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7577 
7578 	return soft_reset_required;
7579 }
7580 
7581 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7582 {
7583 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7584 }
7585 
7586 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7587 {
7588 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7589 }
7590 
7591 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7592 {
7593 	ktime_t zero_time = ktime_set(0, 0);
7594 
7595 	mutex_lock(&hdev->clk_throttling.lock);
7596 
7597 	switch (event_type) {
7598 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7599 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7600 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7601 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7602 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7603 		dev_info_ratelimited(hdev->dev,
7604 			"Clock throttling due to power consumption\n");
7605 		break;
7606 
7607 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7608 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7609 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7610 		dev_info_ratelimited(hdev->dev,
7611 			"Power envelop is safe, back to optimal clock\n");
7612 		break;
7613 
7614 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7615 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7616 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7617 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7618 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7619 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7620 		dev_info_ratelimited(hdev->dev,
7621 			"Clock throttling due to overheating\n");
7622 		break;
7623 
7624 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7625 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7626 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7627 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7628 		dev_info_ratelimited(hdev->dev,
7629 			"Thermal envelop is safe, back to optimal clock\n");
7630 		break;
7631 
7632 	default:
7633 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7634 			event_type);
7635 		break;
7636 	}
7637 
7638 	mutex_unlock(&hdev->clk_throttling.lock);
7639 }
7640 
7641 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7642 {
7643 	struct gaudi_device *gaudi = hdev->asic_specific;
7644 	struct hl_info_fw_err_info fw_err_info;
7645 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7646 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7647 	u32 fw_fatal_err_flag = 0, flags = 0;
7648 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7649 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7650 	bool reset_required, reset_direct = false;
7651 	u8 cause;
7652 	int rc;
7653 
7654 	if (event_type >= GAUDI_EVENT_SIZE) {
7655 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7656 				event_type, GAUDI_EVENT_SIZE - 1);
7657 		return;
7658 	}
7659 
7660 	gaudi->events_stat[event_type]++;
7661 	gaudi->events_stat_aggregate[event_type]++;
7662 
7663 	switch (event_type) {
7664 	case GAUDI_EVENT_PCIE_CORE_DERR:
7665 	case GAUDI_EVENT_PCIE_IF_DERR:
7666 	case GAUDI_EVENT_PCIE_PHY_DERR:
7667 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7668 	case GAUDI_EVENT_MME0_ACC_DERR:
7669 	case GAUDI_EVENT_MME0_SBAB_DERR:
7670 	case GAUDI_EVENT_MME1_ACC_DERR:
7671 	case GAUDI_EVENT_MME1_SBAB_DERR:
7672 	case GAUDI_EVENT_MME2_ACC_DERR:
7673 	case GAUDI_EVENT_MME2_SBAB_DERR:
7674 	case GAUDI_EVENT_MME3_ACC_DERR:
7675 	case GAUDI_EVENT_MME3_SBAB_DERR:
7676 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7677 		fallthrough;
7678 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7679 	case GAUDI_EVENT_PSOC_MEM_DERR:
7680 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7681 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7682 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7683 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7684 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7685 	case GAUDI_EVENT_MMU_DERR:
7686 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7687 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7688 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7689 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7690 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7691 		goto reset_device;
7692 
7693 	case GAUDI_EVENT_GIC500:
7694 	case GAUDI_EVENT_AXI_ECC:
7695 	case GAUDI_EVENT_L2_RAM_ECC:
7696 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7697 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7698 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7699 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7700 		goto reset_device;
7701 
7702 	case GAUDI_EVENT_HBM0_SPI_0:
7703 	case GAUDI_EVENT_HBM1_SPI_0:
7704 	case GAUDI_EVENT_HBM2_SPI_0:
7705 	case GAUDI_EVENT_HBM3_SPI_0:
7706 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7707 		gaudi_hbm_read_interrupts(hdev,
7708 				gaudi_hbm_event_to_dev(event_type),
7709 				&eq_entry->hbm_ecc_data);
7710 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7711 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7712 		goto reset_device;
7713 
7714 	case GAUDI_EVENT_HBM0_SPI_1:
7715 	case GAUDI_EVENT_HBM1_SPI_1:
7716 	case GAUDI_EVENT_HBM2_SPI_1:
7717 	case GAUDI_EVENT_HBM3_SPI_1:
7718 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7719 		gaudi_hbm_read_interrupts(hdev,
7720 				gaudi_hbm_event_to_dev(event_type),
7721 				&eq_entry->hbm_ecc_data);
7722 		hl_fw_unmask_irq(hdev, event_type);
7723 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7724 		break;
7725 
7726 	case GAUDI_EVENT_TPC0_DEC:
7727 	case GAUDI_EVENT_TPC1_DEC:
7728 	case GAUDI_EVENT_TPC2_DEC:
7729 	case GAUDI_EVENT_TPC3_DEC:
7730 	case GAUDI_EVENT_TPC4_DEC:
7731 	case GAUDI_EVENT_TPC5_DEC:
7732 	case GAUDI_EVENT_TPC6_DEC:
7733 	case GAUDI_EVENT_TPC7_DEC:
7734 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7735 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7736 		 * The SW upper layer will inspect an internal mapped area to indicate
7737 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7738 		 */
7739 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7740 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7741 		reset_required = gaudi_tpc_read_interrupts(hdev,
7742 					tpc_dec_event_to_tpc_id(event_type),
7743 					"AXI_SLV_DEC_Error");
7744 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7745 		if (reset_required) {
7746 			dev_err(hdev->dev, "reset required due to %s\n",
7747 				gaudi_irq_map_table[event_type].name);
7748 
7749 			reset_direct = true;
7750 			goto reset_device;
7751 		} else {
7752 			hl_fw_unmask_irq(hdev, event_type);
7753 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7754 		}
7755 		break;
7756 
7757 	case GAUDI_EVENT_TPC0_KRN_ERR:
7758 	case GAUDI_EVENT_TPC1_KRN_ERR:
7759 	case GAUDI_EVENT_TPC2_KRN_ERR:
7760 	case GAUDI_EVENT_TPC3_KRN_ERR:
7761 	case GAUDI_EVENT_TPC4_KRN_ERR:
7762 	case GAUDI_EVENT_TPC5_KRN_ERR:
7763 	case GAUDI_EVENT_TPC6_KRN_ERR:
7764 	case GAUDI_EVENT_TPC7_KRN_ERR:
7765 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7766 		reset_required = gaudi_tpc_read_interrupts(hdev,
7767 					tpc_krn_event_to_tpc_id(event_type),
7768 					"KRN_ERR");
7769 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7770 		if (reset_required) {
7771 			dev_err(hdev->dev, "reset required due to %s\n",
7772 				gaudi_irq_map_table[event_type].name);
7773 
7774 			reset_direct = true;
7775 			goto reset_device;
7776 		} else {
7777 			hl_fw_unmask_irq(hdev, event_type);
7778 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7779 		}
7780 		break;
7781 
7782 	case GAUDI_EVENT_PCIE_CORE_SERR:
7783 	case GAUDI_EVENT_PCIE_IF_SERR:
7784 	case GAUDI_EVENT_PCIE_PHY_SERR:
7785 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7786 	case GAUDI_EVENT_MME0_ACC_SERR:
7787 	case GAUDI_EVENT_MME0_SBAB_SERR:
7788 	case GAUDI_EVENT_MME1_ACC_SERR:
7789 	case GAUDI_EVENT_MME1_SBAB_SERR:
7790 	case GAUDI_EVENT_MME2_ACC_SERR:
7791 	case GAUDI_EVENT_MME2_SBAB_SERR:
7792 	case GAUDI_EVENT_MME3_ACC_SERR:
7793 	case GAUDI_EVENT_MME3_SBAB_SERR:
7794 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7795 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7796 	case GAUDI_EVENT_PSOC_MEM_SERR:
7797 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7798 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7799 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7800 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7801 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7802 		fallthrough;
7803 	case GAUDI_EVENT_MMU_SERR:
7804 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7805 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7806 		hl_fw_unmask_irq(hdev, event_type);
7807 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7808 		break;
7809 
7810 	case GAUDI_EVENT_PCIE_DEC:
7811 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7812 	case GAUDI_EVENT_PSOC_AXI_DEC:
7813 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7814 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7815 		hl_fw_unmask_irq(hdev, event_type);
7816 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7817 		break;
7818 
7819 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7820 	case GAUDI_EVENT_MMU_WR_PERM:
7821 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7822 		hl_fw_unmask_irq(hdev, event_type);
7823 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7824 		break;
7825 
7826 	case GAUDI_EVENT_MME0_WBC_RSP:
7827 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7828 	case GAUDI_EVENT_MME1_WBC_RSP:
7829 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7830 	case GAUDI_EVENT_MME2_WBC_RSP:
7831 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7832 	case GAUDI_EVENT_MME3_WBC_RSP:
7833 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7834 	case GAUDI_EVENT_RAZWI_OR_ADC:
7835 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7836 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7837 		fallthrough;
7838 	case GAUDI_EVENT_NIC0_QM0:
7839 	case GAUDI_EVENT_NIC0_QM1:
7840 	case GAUDI_EVENT_NIC1_QM0:
7841 	case GAUDI_EVENT_NIC1_QM1:
7842 	case GAUDI_EVENT_NIC2_QM0:
7843 	case GAUDI_EVENT_NIC2_QM1:
7844 	case GAUDI_EVENT_NIC3_QM0:
7845 	case GAUDI_EVENT_NIC3_QM1:
7846 	case GAUDI_EVENT_NIC4_QM0:
7847 	case GAUDI_EVENT_NIC4_QM1:
7848 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7849 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7850 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7851 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7852 		hl_fw_unmask_irq(hdev, event_type);
7853 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7854 		break;
7855 
7856 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7857 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7858 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7859 		goto reset_device;
7860 
7861 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7862 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7863 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7864 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7865 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7866 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7867 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7868 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7869 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7870 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7871 		hl_fw_unmask_irq(hdev, event_type);
7872 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7873 		break;
7874 
7875 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7876 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7877 		hl_fw_unmask_irq(hdev, event_type);
7878 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7879 		break;
7880 
7881 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7882 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7883 		gaudi_print_sm_sei_info(hdev, event_type,
7884 					&eq_entry->sm_sei_data);
7885 		rc = hl_state_dump(hdev);
7886 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7887 		if (rc)
7888 			dev_err(hdev->dev,
7889 				"Error during system state dump %d\n", rc);
7890 		hl_fw_unmask_irq(hdev, event_type);
7891 		break;
7892 
7893 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7894 		break;
7895 
7896 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7897 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7898 		hl_fw_unmask_irq(hdev, event_type);
7899 		break;
7900 
7901 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7902 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7903 		dev_err(hdev->dev,
7904 			"Received high temp H/W interrupt %d (cause %d)\n",
7905 			event_type, cause);
7906 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7907 		break;
7908 
7909 	case GAUDI_EVENT_DEV_RESET_REQ:
7910 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7911 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7912 		goto reset_device;
7913 
7914 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7915 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7916 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7917 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7918 		goto reset_device;
7919 
7920 	case GAUDI_EVENT_FW_ALIVE_S:
7921 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7922 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7923 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7924 		fw_err_info.event_id = event_type;
7925 		fw_err_info.event_mask = &event_mask;
7926 		hl_handle_fw_err(hdev, &fw_err_info);
7927 		goto reset_device;
7928 
7929 	default:
7930 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7931 				event_type);
7932 		break;
7933 	}
7934 
7935 	if (event_mask)
7936 		hl_notifier_event_send_all(hdev, event_mask);
7937 
7938 	return;
7939 
7940 reset_device:
7941 	reset_required = true;
7942 
7943 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7944 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7945 
7946 		/* notify on device unavailable while the reset triggered by fw */
7947 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7948 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7949 	} else if (hdev->hard_reset_on_fw_events) {
7950 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7951 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7952 	} else {
7953 		reset_required = false;
7954 	}
7955 
7956 	if (reset_required) {
7957 		/* escalate general hw errors to critical/fatal error */
7958 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7959 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7960 
7961 		hl_device_cond_reset(hdev, flags, event_mask);
7962 	} else {
7963 		hl_fw_unmask_irq(hdev, event_type);
7964 		/* Notification on occurred event needs to be sent although reset is not executed */
7965 		if (event_mask)
7966 			hl_notifier_event_send_all(hdev, event_mask);
7967 	}
7968 }
7969 
7970 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7971 {
7972 	struct gaudi_device *gaudi = hdev->asic_specific;
7973 
7974 	if (aggregate) {
7975 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7976 		return gaudi->events_stat_aggregate;
7977 	}
7978 
7979 	*size = (u32) sizeof(gaudi->events_stat);
7980 	return gaudi->events_stat;
7981 }
7982 
7983 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7984 {
7985 	struct gaudi_device *gaudi = hdev->asic_specific;
7986 	u32 status, timeout_usec;
7987 	int rc;
7988 
7989 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7990 		hdev->reset_info.hard_reset_pending)
7991 		return 0;
7992 
7993 	if (hdev->pldm)
7994 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7995 	else
7996 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7997 
7998 	/* L0 & L1 invalidation */
7999 	WREG32(mmSTLB_INV_PS, 3);
8000 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8001 	WREG32(mmSTLB_INV_PS, 2);
8002 
8003 	rc = hl_poll_timeout(
8004 		hdev,
8005 		mmSTLB_INV_PS,
8006 		status,
8007 		!status,
8008 		1000,
8009 		timeout_usec);
8010 
8011 	WREG32(mmSTLB_INV_SET, 0);
8012 
8013 	return rc;
8014 }
8015 
8016 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8017 						bool is_hard, u32 flags,
8018 						u32 asid, u64 va, u64 size)
8019 {
8020 	/* Treat as invalidate all because there is no range invalidation
8021 	 * in Gaudi
8022 	 */
8023 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8024 }
8025 
8026 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8027 {
8028 	u32 status, timeout_usec;
8029 	int rc;
8030 
8031 	if (hdev->pldm)
8032 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8033 	else
8034 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8035 
8036 	WREG32(MMU_ASID, asid);
8037 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8038 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8039 	WREG32(MMU_BUSY, 0x80000000);
8040 
8041 	rc = hl_poll_timeout(
8042 		hdev,
8043 		MMU_BUSY,
8044 		status,
8045 		!(status & 0x80000000),
8046 		1000,
8047 		timeout_usec);
8048 
8049 	if (rc) {
8050 		dev_err(hdev->dev,
8051 			"Timeout during MMU hop0 config of asid %d\n", asid);
8052 		return rc;
8053 	}
8054 
8055 	return 0;
8056 }
8057 
8058 static int gaudi_send_heartbeat(struct hl_device *hdev)
8059 {
8060 	struct gaudi_device *gaudi = hdev->asic_specific;
8061 
8062 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8063 		return 0;
8064 
8065 	return hl_fw_send_heartbeat(hdev);
8066 }
8067 
8068 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8069 {
8070 	struct gaudi_device *gaudi = hdev->asic_specific;
8071 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8072 	int rc;
8073 
8074 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8075 		return 0;
8076 
8077 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8078 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8079 					mmCPU_BOOT_ERR1);
8080 	if (rc)
8081 		return rc;
8082 
8083 	if (!strlen(prop->cpucp_info.card_name))
8084 		strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8085 				CARD_NAME_MAX_LEN);
8086 
8087 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8088 
8089 	set_default_power_values(hdev);
8090 
8091 	return 0;
8092 }
8093 
8094 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8095 		struct engines_data *e)
8096 {
8097 	struct gaudi_device *gaudi = hdev->asic_specific;
8098 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8099 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8100 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8101 	unsigned long *mask = (unsigned long *)mask_arr;
8102 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8103 	bool is_idle = true, is_eng_idle, is_slave;
8104 	u64 offset;
8105 	int i, dma_id, port;
8106 
8107 	if (e)
8108 		hl_engine_data_sprintf(e,
8109 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8110 			"---  -------  ------------  ----------  -------------\n");
8111 
8112 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8113 		dma_id = gaudi_dma_assignment[i];
8114 		offset = dma_id * DMA_QMAN_OFFSET;
8115 
8116 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8117 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8118 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8119 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8120 				IS_DMA_IDLE(dma_core_sts0);
8121 		is_idle &= is_eng_idle;
8122 
8123 		if (mask && !is_eng_idle)
8124 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8125 		if (e)
8126 			hl_engine_data_sprintf(e, fmt, dma_id,
8127 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8128 				qm_cgm_sts, dma_core_sts0);
8129 	}
8130 
8131 	if (e)
8132 		hl_engine_data_sprintf(e,
8133 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8134 			"---  -------  ------------  ----------  ----------\n");
8135 
8136 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8137 		offset = i * TPC_QMAN_OFFSET;
8138 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8139 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8140 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8141 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8142 				IS_TPC_IDLE(tpc_cfg_sts);
8143 		is_idle &= is_eng_idle;
8144 
8145 		if (mask && !is_eng_idle)
8146 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8147 		if (e)
8148 			hl_engine_data_sprintf(e, fmt, i,
8149 				is_eng_idle ? "Y" : "N",
8150 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8151 	}
8152 
8153 	if (e)
8154 		hl_engine_data_sprintf(e,
8155 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8156 			"---  -------  ------------  ----------  -----------\n");
8157 
8158 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8159 		offset = i * MME_QMAN_OFFSET;
8160 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8161 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8162 
8163 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8164 		is_slave = i % 2;
8165 		if (!is_slave) {
8166 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8167 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8168 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8169 		}
8170 
8171 		is_idle &= is_eng_idle;
8172 
8173 		if (mask && !is_eng_idle)
8174 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8175 		if (e) {
8176 			if (!is_slave)
8177 				hl_engine_data_sprintf(e, fmt, i,
8178 					is_eng_idle ? "Y" : "N",
8179 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8180 			else
8181 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8182 					is_eng_idle ? "Y" : "N", "-",
8183 					"-", mme_arch_sts);
8184 		}
8185 	}
8186 
8187 	if (e)
8188 		hl_engine_data_sprintf(e,
8189 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8190 				"---  -------  ------------  ----------\n");
8191 
8192 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8193 		offset = i * NIC_MACRO_QMAN_OFFSET;
8194 		port = 2 * i;
8195 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8196 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8197 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8198 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8199 			is_idle &= is_eng_idle;
8200 
8201 			if (mask && !is_eng_idle)
8202 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8203 			if (e)
8204 				hl_engine_data_sprintf(e, nic_fmt, port,
8205 						is_eng_idle ? "Y" : "N",
8206 						qm_glbl_sts0, qm_cgm_sts);
8207 		}
8208 
8209 		port = 2 * i + 1;
8210 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8211 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8212 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8213 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8214 			is_idle &= is_eng_idle;
8215 
8216 			if (mask && !is_eng_idle)
8217 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8218 			if (e)
8219 				hl_engine_data_sprintf(e, nic_fmt, port,
8220 						is_eng_idle ? "Y" : "N",
8221 						qm_glbl_sts0, qm_cgm_sts);
8222 		}
8223 	}
8224 
8225 	if (e)
8226 		hl_engine_data_sprintf(e, "\n");
8227 
8228 	return is_idle;
8229 }
8230 
8231 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8232 	__acquires(&gaudi->hw_queues_lock)
8233 {
8234 	struct gaudi_device *gaudi = hdev->asic_specific;
8235 
8236 	spin_lock(&gaudi->hw_queues_lock);
8237 }
8238 
8239 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8240 	__releases(&gaudi->hw_queues_lock)
8241 {
8242 	struct gaudi_device *gaudi = hdev->asic_specific;
8243 
8244 	spin_unlock(&gaudi->hw_queues_lock);
8245 }
8246 
8247 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8248 {
8249 	return hdev->pdev->device;
8250 }
8251 
8252 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8253 				size_t max_size)
8254 {
8255 	struct gaudi_device *gaudi = hdev->asic_specific;
8256 
8257 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8258 		return 0;
8259 
8260 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8261 }
8262 
8263 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8264 {
8265 	struct gaudi_device *gaudi = hdev->asic_specific;
8266 
8267 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8268 		return 0;
8269 
8270 	return hl_fw_get_monitor_dump(hdev, data);
8271 }
8272 
8273 /*
8274  * this function should be used only during initialization and/or after reset,
8275  * when there are no active users.
8276  */
8277 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8278 {
8279 	u64 kernel_timeout;
8280 	u32 status, offset;
8281 	int rc;
8282 
8283 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8284 
8285 	if (hdev->pldm)
8286 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8287 	else
8288 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8289 
8290 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8291 			lower_32_bits(tpc_kernel));
8292 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8293 			upper_32_bits(tpc_kernel));
8294 
8295 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8296 			lower_32_bits(tpc_kernel));
8297 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8298 			upper_32_bits(tpc_kernel));
8299 	/* set a valid LUT pointer, content is of no significance */
8300 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8301 			lower_32_bits(tpc_kernel));
8302 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8303 			upper_32_bits(tpc_kernel));
8304 
8305 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8306 			lower_32_bits(CFG_BASE +
8307 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8308 
8309 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8310 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8311 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8312 	/* wait a bit for the engine to start executing */
8313 	usleep_range(1000, 1500);
8314 
8315 	/* wait until engine has finished executing */
8316 	rc = hl_poll_timeout(
8317 		hdev,
8318 		mmTPC0_CFG_STATUS + offset,
8319 		status,
8320 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8321 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8322 		1000,
8323 		kernel_timeout);
8324 
8325 	if (rc) {
8326 		dev_err(hdev->dev,
8327 			"Timeout while waiting for TPC%d icache prefetch\n",
8328 			tpc_id);
8329 		return -EIO;
8330 	}
8331 
8332 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8333 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8334 
8335 	/* wait a bit for the engine to start executing */
8336 	usleep_range(1000, 1500);
8337 
8338 	/* wait until engine has finished executing */
8339 	rc = hl_poll_timeout(
8340 		hdev,
8341 		mmTPC0_CFG_STATUS + offset,
8342 		status,
8343 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8344 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8345 		1000,
8346 		kernel_timeout);
8347 
8348 	if (rc) {
8349 		dev_err(hdev->dev,
8350 			"Timeout while waiting for TPC%d vector pipe\n",
8351 			tpc_id);
8352 		return -EIO;
8353 	}
8354 
8355 	rc = hl_poll_timeout(
8356 		hdev,
8357 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8358 		status,
8359 		(status == 0),
8360 		1000,
8361 		kernel_timeout);
8362 
8363 	if (rc) {
8364 		dev_err(hdev->dev,
8365 			"Timeout while waiting for TPC%d kernel to execute\n",
8366 			tpc_id);
8367 		return -EIO;
8368 	}
8369 
8370 	return 0;
8371 }
8372 
8373 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8374 		struct hl_ctx *ctx)
8375 {
8376 	struct gaudi_device *gaudi = hdev->asic_specific;
8377 	int min_alloc_order, rc, collective_cb_size;
8378 
8379 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8380 		return 0;
8381 
8382 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8383 							HOST_SPACE_INTERNAL_CB_SZ,
8384 							&hdev->internal_cb_pool_dma_addr,
8385 							GFP_KERNEL | __GFP_ZERO);
8386 
8387 	if (!hdev->internal_cb_pool_virt_addr)
8388 		return -ENOMEM;
8389 
8390 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8391 			sizeof(struct packet_fence);
8392 	min_alloc_order = ilog2(collective_cb_size);
8393 
8394 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8395 	if (!hdev->internal_cb_pool) {
8396 		dev_err(hdev->dev,
8397 			"Failed to create internal CB pool\n");
8398 		rc = -ENOMEM;
8399 		goto free_internal_cb_pool;
8400 	}
8401 
8402 	rc = gen_pool_add(hdev->internal_cb_pool,
8403 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8404 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8405 	if (rc) {
8406 		dev_err(hdev->dev,
8407 			"Failed to add memory to internal CB pool\n");
8408 		rc = -EFAULT;
8409 		goto destroy_internal_cb_pool;
8410 	}
8411 
8412 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8413 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8414 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8415 
8416 	if (!hdev->internal_cb_va_base) {
8417 		rc = -ENOMEM;
8418 		goto destroy_internal_cb_pool;
8419 	}
8420 
8421 	mutex_lock(&hdev->mmu_lock);
8422 
8423 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8424 			hdev->internal_cb_pool_dma_addr,
8425 			HOST_SPACE_INTERNAL_CB_SZ);
8426 	if (rc)
8427 		goto unreserve_internal_cb_pool;
8428 
8429 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8430 	if (rc)
8431 		goto unmap_internal_cb_pool;
8432 
8433 	mutex_unlock(&hdev->mmu_lock);
8434 
8435 	return 0;
8436 
8437 unmap_internal_cb_pool:
8438 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8439 			HOST_SPACE_INTERNAL_CB_SZ);
8440 unreserve_internal_cb_pool:
8441 	mutex_unlock(&hdev->mmu_lock);
8442 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8443 			HOST_SPACE_INTERNAL_CB_SZ);
8444 destroy_internal_cb_pool:
8445 	gen_pool_destroy(hdev->internal_cb_pool);
8446 free_internal_cb_pool:
8447 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8448 					hdev->internal_cb_pool_dma_addr);
8449 
8450 	return rc;
8451 }
8452 
8453 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8454 		struct hl_ctx *ctx)
8455 {
8456 	struct gaudi_device *gaudi = hdev->asic_specific;
8457 
8458 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8459 		return;
8460 
8461 	mutex_lock(&hdev->mmu_lock);
8462 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8463 			HOST_SPACE_INTERNAL_CB_SZ);
8464 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8465 			HOST_SPACE_INTERNAL_CB_SZ);
8466 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8467 	mutex_unlock(&hdev->mmu_lock);
8468 
8469 	gen_pool_destroy(hdev->internal_cb_pool);
8470 
8471 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8472 					hdev->internal_cb_pool_dma_addr);
8473 }
8474 
8475 static int gaudi_ctx_init(struct hl_ctx *ctx)
8476 {
8477 	int rc;
8478 
8479 	if (ctx->asid == HL_KERNEL_ASID_ID)
8480 		return 0;
8481 
8482 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8483 	if (rc)
8484 		return rc;
8485 
8486 	rc = gaudi_restore_user_registers(ctx->hdev);
8487 	if (rc)
8488 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8489 
8490 	return rc;
8491 }
8492 
8493 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8494 {
8495 	if (ctx->asid == HL_KERNEL_ASID_ID)
8496 		return;
8497 
8498 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8499 }
8500 
8501 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8502 {
8503 	return 0;
8504 }
8505 
8506 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8507 {
8508 	return gaudi_cq_assignment[cq_idx];
8509 }
8510 
8511 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8512 {
8513 	return sizeof(struct packet_msg_short) +
8514 			sizeof(struct packet_msg_prot) * 2;
8515 }
8516 
8517 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8518 {
8519 	return sizeof(struct packet_msg_short) * 4 +
8520 			sizeof(struct packet_fence) +
8521 			sizeof(struct packet_msg_prot) * 2;
8522 }
8523 
8524 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8525 {
8526 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8527 }
8528 
8529 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8530 				u32 size, bool eb)
8531 {
8532 	struct hl_cb *cb = (struct hl_cb *) data;
8533 	struct packet_msg_short *pkt;
8534 	u32 value, ctl, pkt_size = sizeof(*pkt);
8535 
8536 	pkt = cb->kernel_address + size;
8537 	memset(pkt, 0, pkt_size);
8538 
8539 	/* Inc by 1, Mode ADD */
8540 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8541 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8542 
8543 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8544 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8545 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8546 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8547 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8548 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8549 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8550 
8551 	pkt->value = cpu_to_le32(value);
8552 	pkt->ctl = cpu_to_le32(ctl);
8553 
8554 	return size + pkt_size;
8555 }
8556 
8557 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8558 					u16 addr)
8559 {
8560 	u32 ctl, pkt_size = sizeof(*pkt);
8561 
8562 	memset(pkt, 0, pkt_size);
8563 
8564 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8565 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8566 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8567 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8568 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8569 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8570 
8571 	pkt->value = cpu_to_le32(value);
8572 	pkt->ctl = cpu_to_le32(ctl);
8573 
8574 	return pkt_size;
8575 }
8576 
8577 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8578 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8579 		u16 sob_val, u16 mon_id)
8580 {
8581 	u64 monitor_base;
8582 	u32 ctl, value, pkt_size = sizeof(*pkt);
8583 	u16 msg_addr_offset;
8584 	u8 mask;
8585 
8586 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8587 		dev_err(hdev->dev,
8588 			"sob_base %u (mask %#x) is not valid\n",
8589 			sob_base, sob_mask);
8590 		return 0;
8591 	}
8592 
8593 	/*
8594 	 * monitor_base should be the content of the base0 address registers,
8595 	 * so it will be added to the msg short offsets
8596 	 */
8597 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8598 
8599 	msg_addr_offset =
8600 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8601 				monitor_base;
8602 
8603 	memset(pkt, 0, pkt_size);
8604 
8605 	/* Monitor config packet: bind the monitor to a sync object */
8606 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8607 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8608 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8609 			0); /* GREATER OR EQUAL*/
8610 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8611 
8612 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8613 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8614 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8615 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8616 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8617 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8618 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8619 
8620 	pkt->value = cpu_to_le32(value);
8621 	pkt->ctl = cpu_to_le32(ctl);
8622 
8623 	return pkt_size;
8624 }
8625 
8626 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8627 {
8628 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8629 
8630 	memset(pkt, 0, pkt_size);
8631 
8632 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8633 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8634 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8635 
8636 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8637 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8638 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8639 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8640 
8641 	pkt->cfg = cpu_to_le32(cfg);
8642 	pkt->ctl = cpu_to_le32(ctl);
8643 
8644 	return pkt_size;
8645 }
8646 
8647 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8648 {
8649 	u32 offset, nic_index;
8650 
8651 	switch (queue_id) {
8652 	case GAUDI_QUEUE_ID_DMA_0_0:
8653 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8654 		break;
8655 	case GAUDI_QUEUE_ID_DMA_0_1:
8656 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8657 		break;
8658 	case GAUDI_QUEUE_ID_DMA_0_2:
8659 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8660 		break;
8661 	case GAUDI_QUEUE_ID_DMA_0_3:
8662 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8663 		break;
8664 	case GAUDI_QUEUE_ID_DMA_1_0:
8665 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8666 		break;
8667 	case GAUDI_QUEUE_ID_DMA_1_1:
8668 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8669 		break;
8670 	case GAUDI_QUEUE_ID_DMA_1_2:
8671 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8672 		break;
8673 	case GAUDI_QUEUE_ID_DMA_1_3:
8674 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8675 		break;
8676 	case GAUDI_QUEUE_ID_DMA_5_0:
8677 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8678 		break;
8679 	case GAUDI_QUEUE_ID_DMA_5_1:
8680 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8681 		break;
8682 	case GAUDI_QUEUE_ID_DMA_5_2:
8683 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8684 		break;
8685 	case GAUDI_QUEUE_ID_DMA_5_3:
8686 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8687 		break;
8688 	case GAUDI_QUEUE_ID_TPC_7_0:
8689 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8690 		break;
8691 	case GAUDI_QUEUE_ID_TPC_7_1:
8692 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8693 		break;
8694 	case GAUDI_QUEUE_ID_TPC_7_2:
8695 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8696 		break;
8697 	case GAUDI_QUEUE_ID_TPC_7_3:
8698 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8699 		break;
8700 	case GAUDI_QUEUE_ID_NIC_0_0:
8701 	case GAUDI_QUEUE_ID_NIC_1_0:
8702 	case GAUDI_QUEUE_ID_NIC_2_0:
8703 	case GAUDI_QUEUE_ID_NIC_3_0:
8704 	case GAUDI_QUEUE_ID_NIC_4_0:
8705 	case GAUDI_QUEUE_ID_NIC_5_0:
8706 	case GAUDI_QUEUE_ID_NIC_6_0:
8707 	case GAUDI_QUEUE_ID_NIC_7_0:
8708 	case GAUDI_QUEUE_ID_NIC_8_0:
8709 	case GAUDI_QUEUE_ID_NIC_9_0:
8710 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8711 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8712 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8713 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8714 		break;
8715 	case GAUDI_QUEUE_ID_NIC_0_1:
8716 	case GAUDI_QUEUE_ID_NIC_1_1:
8717 	case GAUDI_QUEUE_ID_NIC_2_1:
8718 	case GAUDI_QUEUE_ID_NIC_3_1:
8719 	case GAUDI_QUEUE_ID_NIC_4_1:
8720 	case GAUDI_QUEUE_ID_NIC_5_1:
8721 	case GAUDI_QUEUE_ID_NIC_6_1:
8722 	case GAUDI_QUEUE_ID_NIC_7_1:
8723 	case GAUDI_QUEUE_ID_NIC_8_1:
8724 	case GAUDI_QUEUE_ID_NIC_9_1:
8725 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8726 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8727 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8728 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8729 		break;
8730 	case GAUDI_QUEUE_ID_NIC_0_2:
8731 	case GAUDI_QUEUE_ID_NIC_1_2:
8732 	case GAUDI_QUEUE_ID_NIC_2_2:
8733 	case GAUDI_QUEUE_ID_NIC_3_2:
8734 	case GAUDI_QUEUE_ID_NIC_4_2:
8735 	case GAUDI_QUEUE_ID_NIC_5_2:
8736 	case GAUDI_QUEUE_ID_NIC_6_2:
8737 	case GAUDI_QUEUE_ID_NIC_7_2:
8738 	case GAUDI_QUEUE_ID_NIC_8_2:
8739 	case GAUDI_QUEUE_ID_NIC_9_2:
8740 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8741 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8742 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8743 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8744 		break;
8745 	case GAUDI_QUEUE_ID_NIC_0_3:
8746 	case GAUDI_QUEUE_ID_NIC_1_3:
8747 	case GAUDI_QUEUE_ID_NIC_2_3:
8748 	case GAUDI_QUEUE_ID_NIC_3_3:
8749 	case GAUDI_QUEUE_ID_NIC_4_3:
8750 	case GAUDI_QUEUE_ID_NIC_5_3:
8751 	case GAUDI_QUEUE_ID_NIC_6_3:
8752 	case GAUDI_QUEUE_ID_NIC_7_3:
8753 	case GAUDI_QUEUE_ID_NIC_8_3:
8754 	case GAUDI_QUEUE_ID_NIC_9_3:
8755 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8756 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8757 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8758 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8759 		break;
8760 	default:
8761 		return -EINVAL;
8762 	}
8763 
8764 	*addr = CFG_BASE + offset;
8765 
8766 	return 0;
8767 }
8768 
8769 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8770 {
8771 	u64 monitor_base;
8772 	u32 size = 0;
8773 	u16 msg_addr_offset;
8774 
8775 	/*
8776 	 * monitor_base should be the content of the base0 address registers,
8777 	 * so it will be added to the msg short offsets
8778 	 */
8779 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8780 
8781 	/* First monitor config packet: low address of the sync */
8782 	msg_addr_offset =
8783 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8784 				monitor_base;
8785 
8786 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8787 					msg_addr_offset);
8788 
8789 	/* Second monitor config packet: high address of the sync */
8790 	msg_addr_offset =
8791 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8792 				monitor_base;
8793 
8794 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8795 					msg_addr_offset);
8796 
8797 	/*
8798 	 * Third monitor config packet: the payload, i.e. what to write when the
8799 	 * sync triggers
8800 	 */
8801 	msg_addr_offset =
8802 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8803 				monitor_base;
8804 
8805 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8806 
8807 	return size;
8808 }
8809 
8810 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8811 				struct hl_gen_wait_properties *prop)
8812 {
8813 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8814 	void *buf = cb->kernel_address;
8815 	u64 fence_addr = 0;
8816 	u32 size = prop->size;
8817 
8818 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8819 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8820 				prop->q_idx);
8821 		return 0;
8822 	}
8823 
8824 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8825 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8826 			prop->sob_mask, prop->sob_val, prop->mon_id);
8827 	size += gaudi_add_fence_pkt(buf + size);
8828 
8829 	return size;
8830 }
8831 
8832 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8833 {
8834 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8835 
8836 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8837 		hw_sob->sob_id);
8838 
8839 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8840 			hw_sob->sob_id * 4, 0);
8841 
8842 	kref_init(&hw_sob->kref);
8843 }
8844 
8845 static u64 gaudi_get_device_time(struct hl_device *hdev)
8846 {
8847 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8848 
8849 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8850 }
8851 
8852 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8853 				u32 *block_size, u32 *block_id)
8854 {
8855 	return -EPERM;
8856 }
8857 
8858 static int gaudi_block_mmap(struct hl_device *hdev,
8859 				struct vm_area_struct *vma,
8860 				u32 block_id, u32 block_size)
8861 {
8862 	return -EPERM;
8863 }
8864 
8865 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8866 {
8867 	struct cpu_dyn_regs *dyn_regs =
8868 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8869 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8870 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8871 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8872 
8873 	WREG32(irq_handler_offset,
8874 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8875 }
8876 
8877 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8878 {
8879 	return -EINVAL;
8880 }
8881 
8882 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8883 {
8884 	switch (pll_idx) {
8885 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8886 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8887 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8888 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8889 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8890 	case HL_GAUDI_MME_PLL: return MME_PLL;
8891 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8892 	case HL_GAUDI_IF_PLL: return IF_PLL;
8893 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8894 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8895 	default: return -EINVAL;
8896 	}
8897 }
8898 
8899 static int gaudi_add_sync_to_engine_map_entry(
8900 	struct hl_sync_to_engine_map *map, u32 reg_value,
8901 	enum hl_sync_engine_type engine_type, u32 engine_id)
8902 {
8903 	struct hl_sync_to_engine_map_entry *entry;
8904 
8905 	/* Reg value represents a partial address of sync object,
8906 	 * it is used as unique identifier. For this we need to
8907 	 * clear the cutoff cfg base bits from the value.
8908 	 */
8909 	if (reg_value == 0 || reg_value == 0xffffffff)
8910 		return 0;
8911 	reg_value -= lower_32_bits(CFG_BASE);
8912 
8913 	/* create a new hash entry */
8914 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8915 	if (!entry)
8916 		return -ENOMEM;
8917 	entry->engine_type = engine_type;
8918 	entry->engine_id = engine_id;
8919 	entry->sync_id = reg_value;
8920 	hash_add(map->tb, &entry->node, reg_value);
8921 
8922 	return 0;
8923 }
8924 
8925 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8926 				struct hl_sync_to_engine_map *map)
8927 {
8928 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8929 	int i, j, rc;
8930 	u32 reg_value;
8931 
8932 	/* Iterate over TPC engines */
8933 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8934 
8935 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8936 					sds->props[SP_NEXT_TPC] * i);
8937 
8938 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8939 							ENGINE_TPC, i);
8940 		if (rc)
8941 			goto free_sync_to_engine_map;
8942 	}
8943 
8944 	/* Iterate over MME engines */
8945 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8946 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8947 
8948 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8949 						sds->props[SP_NEXT_MME] * i +
8950 						j * sizeof(u32));
8951 
8952 			rc = gaudi_add_sync_to_engine_map_entry(
8953 				map, reg_value, ENGINE_MME,
8954 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8955 			if (rc)
8956 				goto free_sync_to_engine_map;
8957 		}
8958 	}
8959 
8960 	/* Iterate over DMA engines */
8961 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8962 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8963 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8964 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8965 							ENGINE_DMA, i);
8966 		if (rc)
8967 			goto free_sync_to_engine_map;
8968 	}
8969 
8970 	return 0;
8971 
8972 free_sync_to_engine_map:
8973 	hl_state_dump_free_sync_to_engine_map(map);
8974 
8975 	return rc;
8976 }
8977 
8978 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8979 {
8980 	return FIELD_GET(
8981 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8982 		mon->status);
8983 }
8984 
8985 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8986 {
8987 	const size_t max_write = 10;
8988 	u32 gid, mask, sob;
8989 	int i, offset;
8990 
8991 	/* Sync object ID is calculated as follows:
8992 	 * (8 * group_id + cleared bits in mask)
8993 	 */
8994 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8995 			mon->arm_data);
8996 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8997 			mon->arm_data);
8998 
8999 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9000 		max_write; mask >>= 1, i++) {
9001 		if (!(mask & 1)) {
9002 			sob = gid * MONITOR_MAX_SOBS + i;
9003 
9004 			if (offset > 0)
9005 				offset += snprintf(sobs + offset, max_write,
9006 							", ");
9007 
9008 			offset += snprintf(sobs + offset, max_write, "%u", sob);
9009 		}
9010 	}
9011 }
9012 
9013 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9014 				struct hl_device *hdev,
9015 				struct hl_mon_state_dump *mon)
9016 {
9017 	const char *name;
9018 	char scratch_buf1[BIN_REG_STRING_SIZE],
9019 		scratch_buf2[BIN_REG_STRING_SIZE];
9020 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9021 
9022 	name = hl_state_dump_get_monitor_name(hdev, mon);
9023 	if (!name)
9024 		name = "";
9025 
9026 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9027 
9028 	return hl_snprintf_resize(
9029 		buf, size, offset,
9030 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9031 		mon->id, name,
9032 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9033 				mon->arm_data),
9034 		hl_format_as_binary(
9035 			scratch_buf1, sizeof(scratch_buf1),
9036 			FIELD_GET(
9037 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9038 				mon->arm_data)),
9039 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9040 				mon->arm_data),
9041 		mon->wr_data,
9042 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9043 		hl_format_as_binary(
9044 			scratch_buf2, sizeof(scratch_buf2),
9045 			FIELD_GET(
9046 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9047 				mon->status)),
9048 		monitored_sobs);
9049 }
9050 
9051 
9052 static int gaudi_print_fences_single_engine(
9053 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9054 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9055 	size_t *size, size_t *offset)
9056 {
9057 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9058 	int rc = -ENOMEM, i;
9059 	u32 *statuses, *fences;
9060 
9061 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9062 			sizeof(*statuses), GFP_KERNEL);
9063 	if (!statuses)
9064 		goto out;
9065 
9066 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9067 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9068 			 sizeof(*fences), GFP_KERNEL);
9069 	if (!fences)
9070 		goto free_status;
9071 
9072 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9073 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9074 
9075 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9076 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9077 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9078 
9079 	/* The actual print */
9080 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9081 		u32 fence_id;
9082 		u64 fence_cnt, fence_rdata;
9083 		const char *engine_name;
9084 
9085 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9086 			statuses[i]))
9087 			continue;
9088 
9089 		fence_id =
9090 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9091 		fence_cnt = base_offset + CFG_BASE +
9092 			sizeof(u32) *
9093 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9094 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9095 				sds->props[SP_FENCE0_RDATA_OFFSET];
9096 		engine_name = hl_sync_engine_to_string(engine_type);
9097 
9098 		rc = hl_snprintf_resize(
9099 			buf, size, offset,
9100 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9101 			engine_name, engine_id,
9102 			i, fence_id,
9103 			fence_cnt, engine_name, engine_id, fence_id, i,
9104 			fence_rdata, engine_name, engine_id, fence_id, i,
9105 			fences[fence_id],
9106 			statuses[i]);
9107 		if (rc)
9108 			goto free_fences;
9109 	}
9110 
9111 	rc = 0;
9112 
9113 free_fences:
9114 	kfree(fences);
9115 free_status:
9116 	kfree(statuses);
9117 out:
9118 	return rc;
9119 }
9120 
9121 
9122 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9123 	.monitor_valid = gaudi_monitor_valid,
9124 	.print_single_monitor = gaudi_print_single_monitor,
9125 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9126 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9127 };
9128 
9129 static void gaudi_state_dump_init(struct hl_device *hdev)
9130 {
9131 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9132 	int i;
9133 
9134 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9135 		hash_add(sds->so_id_to_str_tb,
9136 			&gaudi_so_id_to_str[i].node,
9137 			gaudi_so_id_to_str[i].id);
9138 
9139 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9140 		hash_add(sds->monitor_id_to_str_tb,
9141 			&gaudi_monitor_id_to_str[i].node,
9142 			gaudi_monitor_id_to_str[i].id);
9143 
9144 	sds->props = gaudi_state_dump_specs_props;
9145 
9146 	sds->sync_namager_names = gaudi_sync_manager_names;
9147 
9148 	sds->funcs = gaudi_state_dump_funcs;
9149 }
9150 
9151 static u32 *gaudi_get_stream_master_qid_arr(void)
9152 {
9153 	return gaudi_stream_master;
9154 }
9155 
9156 static int gaudi_set_dram_properties(struct hl_device *hdev)
9157 {
9158 	return 0;
9159 }
9160 
9161 static int gaudi_set_binning_masks(struct hl_device *hdev)
9162 {
9163 	return 0;
9164 }
9165 
9166 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9167 {
9168 }
9169 
9170 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9171 {
9172 	struct hl_device *hdev = dev_get_drvdata(dev);
9173 	struct cpucp_info *cpucp_info;
9174 
9175 	cpucp_info = &hdev->asic_prop.cpucp_info;
9176 
9177 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9178 }
9179 
9180 static DEVICE_ATTR_RO(infineon_ver);
9181 
9182 static struct attribute *gaudi_vrm_dev_attrs[] = {
9183 	&dev_attr_infineon_ver.attr,
9184 	NULL,
9185 };
9186 
9187 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9188 					struct attribute_group *dev_vrm_attr_grp)
9189 {
9190 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9191 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9192 }
9193 
9194 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9195 {
9196 	return 0;
9197 }
9198 
9199 static const struct hl_asic_funcs gaudi_funcs = {
9200 	.early_init = gaudi_early_init,
9201 	.early_fini = gaudi_early_fini,
9202 	.late_init = gaudi_late_init,
9203 	.late_fini = gaudi_late_fini,
9204 	.sw_init = gaudi_sw_init,
9205 	.sw_fini = gaudi_sw_fini,
9206 	.hw_init = gaudi_hw_init,
9207 	.hw_fini = gaudi_hw_fini,
9208 	.halt_engines = gaudi_halt_engines,
9209 	.suspend = gaudi_suspend,
9210 	.resume = gaudi_resume,
9211 	.mmap = gaudi_mmap,
9212 	.ring_doorbell = gaudi_ring_doorbell,
9213 	.pqe_write = gaudi_pqe_write,
9214 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9215 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9216 	.scrub_device_mem = gaudi_scrub_device_mem,
9217 	.scrub_device_dram = gaudi_scrub_device_dram,
9218 	.get_int_queue_base = gaudi_get_int_queue_base,
9219 	.test_queues = gaudi_test_queues,
9220 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9221 	.asic_dma_pool_free = gaudi_dma_pool_free,
9222 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9223 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9224 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9225 	.cs_parser = gaudi_cs_parser,
9226 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
9227 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9228 	.update_eq_ci = gaudi_update_eq_ci,
9229 	.context_switch = gaudi_context_switch,
9230 	.restore_phase_topology = gaudi_restore_phase_topology,
9231 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9232 	.add_device_attr = gaudi_add_device_attr,
9233 	.handle_eqe = gaudi_handle_eqe,
9234 	.get_events_stat = gaudi_get_events_stat,
9235 	.read_pte = gaudi_read_pte,
9236 	.write_pte = gaudi_write_pte,
9237 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9238 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9239 	.mmu_prefetch_cache_range = NULL,
9240 	.send_heartbeat = gaudi_send_heartbeat,
9241 	.debug_coresight = gaudi_debug_coresight,
9242 	.is_device_idle = gaudi_is_device_idle,
9243 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9244 	.hw_queues_lock = gaudi_hw_queues_lock,
9245 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9246 	.get_pci_id = gaudi_get_pci_id,
9247 	.get_eeprom_data = gaudi_get_eeprom_data,
9248 	.get_monitor_dump = gaudi_get_monitor_dump,
9249 	.send_cpu_message = gaudi_send_cpu_message,
9250 	.pci_bars_map = gaudi_pci_bars_map,
9251 	.init_iatu = gaudi_init_iatu,
9252 	.rreg = hl_rreg,
9253 	.wreg = hl_wreg,
9254 	.halt_coresight = gaudi_halt_coresight,
9255 	.ctx_init = gaudi_ctx_init,
9256 	.ctx_fini = gaudi_ctx_fini,
9257 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9258 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9259 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9260 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9261 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9262 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9263 	.gen_signal_cb = gaudi_gen_signal_cb,
9264 	.gen_wait_cb = gaudi_gen_wait_cb,
9265 	.reset_sob = gaudi_reset_sob,
9266 	.reset_sob_group = gaudi_reset_sob_group,
9267 	.get_device_time = gaudi_get_device_time,
9268 	.pb_print_security_errors = NULL,
9269 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9270 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9271 	.get_dec_base_addr = NULL,
9272 	.scramble_addr = hl_mmu_scramble_addr,
9273 	.descramble_addr = hl_mmu_descramble_addr,
9274 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9275 	.get_hw_block_id = gaudi_get_hw_block_id,
9276 	.hw_block_mmap = gaudi_block_mmap,
9277 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9278 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9279 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9280 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9281 	.init_firmware_loader = gaudi_init_firmware_loader,
9282 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9283 	.state_dump_init = gaudi_state_dump_init,
9284 	.get_sob_addr = gaudi_get_sob_addr,
9285 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9286 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9287 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9288 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9289 	.access_dev_mem = hl_access_dev_mem,
9290 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9291 	.send_device_activity = gaudi_send_device_activity,
9292 	.set_dram_properties = gaudi_set_dram_properties,
9293 	.set_binning_masks = gaudi_set_binning_masks,
9294 };
9295 
9296 /**
9297  * gaudi_set_asic_funcs - set GAUDI function pointers
9298  *
9299  * @hdev: pointer to hl_device structure
9300  *
9301  */
9302 void gaudi_set_asic_funcs(struct hl_device *hdev)
9303 {
9304 	hdev->asic_funcs = &gaudi_funcs;
9305 }
9306