1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN 20
86
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97
98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
99
100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
101
102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
103
104 #define MONITOR_SOB_STRING_SIZE 256
105
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 GAUDI_QUEUE_ID_DMA_0_0,
108 GAUDI_QUEUE_ID_DMA_0_1,
109 GAUDI_QUEUE_ID_DMA_0_2,
110 GAUDI_QUEUE_ID_DMA_0_3,
111 GAUDI_QUEUE_ID_DMA_1_0,
112 GAUDI_QUEUE_ID_DMA_1_1,
113 GAUDI_QUEUE_ID_DMA_1_2,
114 GAUDI_QUEUE_ID_DMA_1_3
115 };
116
117 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
118 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
119 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
120 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
121 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
122 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
123 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
124 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
125 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
126 };
127
128 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
129 [0] = GAUDI_QUEUE_ID_DMA_0_0,
130 [1] = GAUDI_QUEUE_ID_DMA_0_1,
131 [2] = GAUDI_QUEUE_ID_DMA_0_2,
132 [3] = GAUDI_QUEUE_ID_DMA_0_3,
133 [4] = GAUDI_QUEUE_ID_DMA_1_0,
134 [5] = GAUDI_QUEUE_ID_DMA_1_1,
135 [6] = GAUDI_QUEUE_ID_DMA_1_2,
136 [7] = GAUDI_QUEUE_ID_DMA_1_3,
137 };
138
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
145 [PACKET_REPEAT] = sizeof(struct packet_repeat),
146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
147 [PACKET_FENCE] = sizeof(struct packet_fence),
148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
149 [PACKET_NOP] = sizeof(struct packet_nop),
150 [PACKET_STOP] = sizeof(struct packet_stop),
151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
152 [PACKET_WAIT] = sizeof(struct packet_wait),
153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154 };
155
validate_packet_id(enum packet_id id)156 static inline bool validate_packet_id(enum packet_id id)
157 {
158 switch (id) {
159 case PACKET_WREG_32:
160 case PACKET_WREG_BULK:
161 case PACKET_MSG_LONG:
162 case PACKET_MSG_SHORT:
163 case PACKET_CP_DMA:
164 case PACKET_REPEAT:
165 case PACKET_MSG_PROT:
166 case PACKET_FENCE:
167 case PACKET_LIN_DMA:
168 case PACKET_NOP:
169 case PACKET_STOP:
170 case PACKET_ARB_POINT:
171 case PACKET_WAIT:
172 case PACKET_LOAD_AND_EXE:
173 return true;
174 default:
175 return false;
176 }
177 }
178
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 "tpc_address_exceed_slm",
182 "tpc_div_by_0",
183 "tpc_spu_mac_overflow",
184 "tpc_spu_addsub_overflow",
185 "tpc_spu_abs_overflow",
186 "tpc_spu_fp_dst_nan_inf",
187 "tpc_spu_fp_dst_denorm",
188 "tpc_vpu_mac_overflow",
189 "tpc_vpu_addsub_overflow",
190 "tpc_vpu_abs_overflow",
191 "tpc_vpu_fp_dst_nan_inf",
192 "tpc_vpu_fp_dst_denorm",
193 "tpc_assertions",
194 "tpc_illegal_instruction",
195 "tpc_pc_wrap_around",
196 "tpc_qm_sw_err",
197 "tpc_hbw_rresp_err",
198 "tpc_hbw_bresp_err",
199 "tpc_lbw_rresp_err",
200 "tpc_lbw_bresp_err"
201 };
202
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205 "PQ AXI HBW error",
206 "CQ AXI HBW error",
207 "CP AXI HBW error",
208 "CP error due to undefined OPCODE",
209 "CP encountered STOP OPCODE",
210 "CP AXI LBW error",
211 "CP WRREG32 or WRBULK returned error",
212 "N/A",
213 "FENCE 0 inc over max value and clipped",
214 "FENCE 1 inc over max value and clipped",
215 "FENCE 2 inc over max value and clipped",
216 "FENCE 3 inc over max value and clipped",
217 "FENCE 0 dec under min value and clipped",
218 "FENCE 1 dec under min value and clipped",
219 "FENCE 2 dec under min value and clipped",
220 "FENCE 3 dec under min value and clipped"
221 };
222
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 "Choice push while full error",
226 "Choice Q watchdog error",
227 "MSG AXI LBW returned with error"
228 };
229
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345
346 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
347 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
348 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
349 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
350 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
351 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
352 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
353 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
354 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
355 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
356 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
357 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
358 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
359 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
360 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
361 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
362 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
363 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
364 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
365 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
366 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
367 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
368 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
369 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
370 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
371 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
372 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
373 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
374 };
375
376 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
377 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
378 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
379 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
380 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
381 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
382 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
383 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
384 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
385 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
386 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
387 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
388 };
389
390 static s64 gaudi_state_dump_specs_props[] = {
391 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
392 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
393 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
394 [SP_MON_OBJ_WR_ADDR_LOW] =
395 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
396 [SP_MON_OBJ_WR_ADDR_HIGH] =
397 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
398 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
399 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
400 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
401 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
402 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
403 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
404 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
405 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
406 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
407 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
408 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
409 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
410 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
411 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
412 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
413 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
414 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
415 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
416 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
417 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
418 [SP_FENCE0_CNT_OFFSET] =
419 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
420 [SP_FENCE0_RDATA_OFFSET] =
421 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
422 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
423 [SP_NUM_CORES] = 1,
424 };
425
426 static const int gaudi_queue_id_to_engine_id[] = {
427 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
428 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
429 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
430 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
431 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
432 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
433 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
434 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
435 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
436 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
437 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
438 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
439 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
440 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
441 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
442 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
443 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
444 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
445 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
446 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
447 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
448 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
449 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
450 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
451 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
452 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
453 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
454 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
455 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
456 };
457
458 /* The order here is opposite to the order of the indexing in the h/w.
459 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
460 */
461 static const char * const gaudi_sync_manager_names[] = {
462 "SYNC_MGR_E_N",
463 "SYNC_MGR_W_N",
464 "SYNC_MGR_E_S",
465 "SYNC_MGR_W_S",
466 NULL
467 };
468
469 struct ecc_info_extract_params {
470 u64 block_address;
471 u32 num_memories;
472 bool derr;
473 };
474
475 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
476 u64 phys_addr);
477 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
478 struct hl_cs_job *job);
479 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
480 u32 size, u64 val);
481 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
482 u32 num_regs, u32 val);
483 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
484 u32 tpc_id);
485 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
486 static int gaudi_cpucp_info_get(struct hl_device *hdev);
487 static void gaudi_disable_clock_gating(struct hl_device *hdev);
488 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
489 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
490 u32 size, bool eb);
491 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
492 struct hl_gen_wait_properties *prop);
493 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)494 get_collective_mode(struct hl_device *hdev, u32 queue_id)
495 {
496 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
497 return HL_COLLECTIVE_MASTER;
498
499 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
500 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
501 return HL_COLLECTIVE_SLAVE;
502
503 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
504 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
505 return HL_COLLECTIVE_SLAVE;
506
507 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
508 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
509 return HL_COLLECTIVE_SLAVE;
510
511 return HL_COLLECTIVE_NOT_SUPPORTED;
512 }
513
set_default_power_values(struct hl_device * hdev)514 static inline void set_default_power_values(struct hl_device *hdev)
515 {
516 struct asic_fixed_properties *prop = &hdev->asic_prop;
517
518 if (hdev->card_type == cpucp_card_type_pmc) {
519 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
520
521 if (prop->fw_security_enabled)
522 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
523 else
524 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
525 } else {
526 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
527 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
528 }
529 }
530
gaudi_set_fixed_properties(struct hl_device * hdev)531 static int gaudi_set_fixed_properties(struct hl_device *hdev)
532 {
533 struct asic_fixed_properties *prop = &hdev->asic_prop;
534 u32 num_sync_stream_queues = 0;
535 int i;
536
537 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
538 prop->hw_queues_props = kcalloc(prop->max_queues,
539 sizeof(struct hw_queue_properties),
540 GFP_KERNEL);
541
542 if (!prop->hw_queues_props)
543 return -ENOMEM;
544
545 for (i = 0 ; i < prop->max_queues ; i++) {
546 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
547 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
548 prop->hw_queues_props[i].driver_only = 0;
549 prop->hw_queues_props[i].supports_sync_stream = 1;
550 prop->hw_queues_props[i].cb_alloc_flags =
551 CB_ALLOC_KERNEL;
552 num_sync_stream_queues++;
553 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
554 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
555 prop->hw_queues_props[i].driver_only = 1;
556 prop->hw_queues_props[i].supports_sync_stream = 0;
557 prop->hw_queues_props[i].cb_alloc_flags =
558 CB_ALLOC_KERNEL;
559 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
560 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
561 prop->hw_queues_props[i].driver_only = 0;
562 prop->hw_queues_props[i].supports_sync_stream = 0;
563 prop->hw_queues_props[i].cb_alloc_flags =
564 CB_ALLOC_USER;
565
566 }
567 prop->hw_queues_props[i].collective_mode =
568 get_collective_mode(hdev, i);
569 }
570
571 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
572 prop->cfg_base_address = CFG_BASE;
573 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
574 prop->host_base_address = HOST_PHYS_BASE;
575 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
576 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
577 prop->completion_mode = HL_COMPLETION_MODE_JOB;
578 prop->collective_first_sob = 0;
579 prop->collective_first_mon = 0;
580
581 /* 2 SOBs per internal queue stream are reserved for collective */
582 prop->sync_stream_first_sob =
583 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
584 * QMAN_STREAMS * HL_RSVD_SOBS;
585
586 /* 1 monitor per internal queue stream are reserved for collective
587 * 2 monitors per external queue stream are reserved for collective
588 */
589 prop->sync_stream_first_mon =
590 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
591 (NUMBER_OF_EXT_HW_QUEUES * 2);
592
593 prop->dram_base_address = DRAM_PHYS_BASE;
594 prop->dram_size = GAUDI_HBM_SIZE_32GB;
595 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
596 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
597
598 prop->sram_base_address = SRAM_BASE_ADDR;
599 prop->sram_size = SRAM_SIZE;
600 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
601 prop->sram_user_base_address =
602 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
603
604 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
605 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
606
607 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
608 if (hdev->pldm)
609 prop->mmu_pgt_size = 0x800000; /* 8MB */
610 else
611 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
612 prop->mmu_pte_size = HL_PTE_SIZE;
613 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
614 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
615 prop->dram_page_size = PAGE_SIZE_2MB;
616 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
617 prop->dram_supports_virtual_memory = false;
618
619 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
620 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
621 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
622 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
623 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
624 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
625 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
626 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
627 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
628 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
629 prop->pmmu.start_addr = VA_HOST_SPACE_START;
630 prop->pmmu.end_addr =
631 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
632 prop->pmmu.page_size = PAGE_SIZE_4KB;
633 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
634 prop->pmmu.last_mask = LAST_MASK;
635 /* TODO: will be duplicated until implementing per-MMU props */
636 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
637 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
638
639 /* PMMU and HPMMU are the same except of page size */
640 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
641 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
642
643 /* shifts and masks are the same in PMMU and DMMU */
644 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
645 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
646 prop->dmmu.end_addr = VA_HOST_SPACE_END;
647 prop->dmmu.page_size = PAGE_SIZE_2MB;
648
649 prop->cfg_size = CFG_SIZE;
650 prop->max_asid = MAX_ASID;
651 prop->num_of_events = GAUDI_EVENT_SIZE;
652 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
653 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
654
655 set_default_power_values(hdev);
656
657 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
658 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
659
660 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
661 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
662
663 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
664 CARD_NAME_MAX_LEN);
665
666 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
667
668 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
669 prop->sync_stream_first_sob +
670 (num_sync_stream_queues * HL_RSVD_SOBS);
671 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
672 prop->sync_stream_first_mon +
673 (num_sync_stream_queues * HL_RSVD_MONS);
674
675 prop->first_available_user_interrupt = USHRT_MAX;
676 prop->tpc_interrupt_id = USHRT_MAX;
677
678 /* single msi */
679 prop->eq_interrupt_id = 0;
680
681 for (i = 0 ; i < HL_MAX_DCORES ; i++)
682 prop->first_available_cq[i] = USHRT_MAX;
683
684 prop->fw_cpu_boot_dev_sts0_valid = false;
685 prop->fw_cpu_boot_dev_sts1_valid = false;
686 prop->hard_reset_done_by_fw = false;
687 prop->gic_interrupts_enable = true;
688
689 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
690
691 prop->clk_pll_index = HL_GAUDI_MME_PLL;
692 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
693
694 prop->use_get_power_for_reset_history = true;
695
696 prop->configurable_stop_on_err = true;
697
698 prop->set_max_power_on_device_init = true;
699
700 prop->dma_mask = 48;
701
702 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
703
704 return 0;
705 }
706
gaudi_pci_bars_map(struct hl_device * hdev)707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709 static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 bool is_wc[3] = {false, false, true};
711 int rc;
712
713 rc = hl_pci_bars_map(hdev, name, is_wc);
714 if (rc)
715 return rc;
716
717 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 (CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720 return 0;
721 }
722
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725 struct gaudi_device *gaudi = hdev->asic_specific;
726 struct hl_inbound_pci_region pci_region;
727 u64 old_addr = addr;
728 int rc;
729
730 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731 return old_addr;
732
733 if (hdev->asic_prop.iatu_done_by_fw)
734 return U64_MAX;
735
736 /* Inbound Region 2 - Bar 4 - Point to HBM */
737 pci_region.mode = PCI_BAR_MATCH_MODE;
738 pci_region.bar = HBM_BAR_ID;
739 pci_region.addr = addr;
740 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741 if (rc)
742 return U64_MAX;
743
744 if (gaudi) {
745 old_addr = gaudi->hbm_bar_cur_addr;
746 gaudi->hbm_bar_cur_addr = addr;
747 }
748
749 return old_addr;
750 }
751
gaudi_init_iatu(struct hl_device * hdev)752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754 struct hl_inbound_pci_region inbound_region;
755 struct hl_outbound_pci_region outbound_region;
756 int rc;
757
758 if (hdev->asic_prop.iatu_done_by_fw)
759 return 0;
760
761 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762 inbound_region.mode = PCI_BAR_MATCH_MODE;
763 inbound_region.bar = SRAM_BAR_ID;
764 inbound_region.addr = SRAM_BASE_ADDR;
765 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766 if (rc)
767 goto done;
768
769 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770 inbound_region.mode = PCI_BAR_MATCH_MODE;
771 inbound_region.bar = CFG_BAR_ID;
772 inbound_region.addr = SPI_FLASH_BASE_ADDR;
773 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774 if (rc)
775 goto done;
776
777 /* Inbound Region 2 - Bar 4 - Point to HBM */
778 inbound_region.mode = PCI_BAR_MATCH_MODE;
779 inbound_region.bar = HBM_BAR_ID;
780 inbound_region.addr = DRAM_PHYS_BASE;
781 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782 if (rc)
783 goto done;
784
785 /* Outbound Region 0 - Point to Host */
786 outbound_region.addr = HOST_PHYS_BASE;
787 outbound_region.size = HOST_PHYS_SIZE;
788 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789
790 done:
791 return rc;
792 }
793
gaudi_get_hw_state(struct hl_device * hdev)794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796 return RREG32(mmHW_STATE);
797 }
798
gaudi_early_init(struct hl_device * hdev)799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801 struct asic_fixed_properties *prop = &hdev->asic_prop;
802 struct pci_dev *pdev = hdev->pdev;
803 resource_size_t pci_bar_size;
804 u32 fw_boot_status;
805 int rc;
806
807 rc = gaudi_set_fixed_properties(hdev);
808 if (rc) {
809 dev_err(hdev->dev, "Failed setting fixed properties\n");
810 return rc;
811 }
812
813 /* Check BAR sizes */
814 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815
816 if (pci_bar_size != SRAM_BAR_SIZE) {
817 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819 rc = -ENODEV;
820 goto free_queue_props;
821 }
822
823 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824
825 if (pci_bar_size != CFG_BAR_SIZE) {
826 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828 rc = -ENODEV;
829 goto free_queue_props;
830 }
831
832 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834
835 /* If FW security is enabled at this point it means no access to ELBI */
836 if (hdev->asic_prop.fw_security_enabled) {
837 hdev->asic_prop.iatu_done_by_fw = true;
838
839 /*
840 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841 * decision can only be taken based on PCI ID security.
842 */
843 hdev->asic_prop.gic_interrupts_enable = false;
844 goto pci_init;
845 }
846
847 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848 &fw_boot_status);
849 if (rc)
850 goto free_queue_props;
851
852 /* Check whether FW is configuring iATU */
853 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855 hdev->asic_prop.iatu_done_by_fw = true;
856
857 pci_init:
858 rc = hl_pci_init(hdev);
859 if (rc)
860 goto free_queue_props;
861
862 /* Before continuing in the initialization, we need to read the preboot
863 * version to determine whether we run with a security-enabled firmware
864 */
865 rc = hl_fw_read_preboot_status(hdev);
866 if (rc) {
867 if (hdev->reset_on_preboot_fail)
868 /* we are already on failure flow, so don't check if hw_fini fails. */
869 hdev->asic_funcs->hw_fini(hdev, true, false);
870 goto pci_fini;
871 }
872
873 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
874 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
875 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
876 if (rc) {
877 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
878 goto pci_fini;
879 }
880 }
881
882 return 0;
883
884 pci_fini:
885 hl_pci_fini(hdev);
886 free_queue_props:
887 kfree(hdev->asic_prop.hw_queues_props);
888 return rc;
889 }
890
gaudi_early_fini(struct hl_device * hdev)891 static int gaudi_early_fini(struct hl_device *hdev)
892 {
893 kfree(hdev->asic_prop.hw_queues_props);
894 hl_pci_fini(hdev);
895
896 return 0;
897 }
898
899 /**
900 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
901 *
902 * @hdev: pointer to hl_device structure
903 *
904 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)905 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
906 {
907 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
908 struct asic_fixed_properties *prop = &hdev->asic_prop;
909 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
910 int rc;
911
912 if ((hdev->fw_components & FW_TYPE_LINUX) &&
913 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
914 struct gaudi_device *gaudi = hdev->asic_specific;
915
916 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
917 return 0;
918
919 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
920
921 if (rc)
922 return rc;
923
924 freq = pll_freq_arr[2];
925 } else {
926 /* Backward compatibility */
927 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
928 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
929 nr = RREG32(mmPSOC_CPU_PLL_NR);
930 nf = RREG32(mmPSOC_CPU_PLL_NF);
931 od = RREG32(mmPSOC_CPU_PLL_OD);
932
933 if (div_sel == DIV_SEL_REF_CLK ||
934 div_sel == DIV_SEL_DIVIDED_REF) {
935 if (div_sel == DIV_SEL_REF_CLK)
936 freq = PLL_REF_CLK;
937 else
938 freq = PLL_REF_CLK / (div_fctr + 1);
939 } else if (div_sel == DIV_SEL_PLL_CLK ||
940 div_sel == DIV_SEL_DIVIDED_PLL) {
941 pll_clk = PLL_REF_CLK * (nf + 1) /
942 ((nr + 1) * (od + 1));
943 if (div_sel == DIV_SEL_PLL_CLK)
944 freq = pll_clk;
945 else
946 freq = pll_clk / (div_fctr + 1);
947 } else {
948 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
949 freq = 0;
950 }
951 }
952
953 prop->psoc_timestamp_frequency = freq;
954 prop->psoc_pci_pll_nr = nr;
955 prop->psoc_pci_pll_nf = nf;
956 prop->psoc_pci_pll_od = od;
957 prop->psoc_pci_pll_div_factor = div_fctr;
958
959 return 0;
960 }
961
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)962 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
963 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
964 {
965 struct asic_fixed_properties *prop = &hdev->asic_prop;
966 struct packet_lin_dma *init_tpc_mem_pkt;
967 struct hl_cs_job *job;
968 struct hl_cb *cb;
969 u64 dst_addr;
970 u32 cb_size, ctl;
971 u8 tpc_id;
972 int rc;
973
974 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
975 if (!cb)
976 return -EFAULT;
977
978 init_tpc_mem_pkt = cb->kernel_address;
979 cb_size = sizeof(*init_tpc_mem_pkt);
980 memset(init_tpc_mem_pkt, 0, cb_size);
981
982 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
983
984 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
985 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
986 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
987 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
988
989 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
990
991 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
992
993 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
994 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
995 round_up(prop->sram_user_base_address, SZ_8K));
996 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
997
998 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
999 if (!job) {
1000 dev_err(hdev->dev, "Failed to allocate a new job\n");
1001 rc = -ENOMEM;
1002 goto release_cb;
1003 }
1004
1005 job->id = 0;
1006 job->user_cb = cb;
1007 atomic_inc(&job->user_cb->cs_cnt);
1008 job->user_cb_size = cb_size;
1009 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1010 job->patched_cb = job->user_cb;
1011 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1012
1013 hl_debugfs_add_job(hdev, job);
1014
1015 rc = gaudi_send_job_on_qman0(hdev, job);
1016
1017 if (rc)
1018 goto free_job;
1019
1020 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1021 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1022 if (rc)
1023 break;
1024 }
1025
1026 free_job:
1027 hl_userptr_delete_list(hdev, &job->userptr_list);
1028 hl_debugfs_remove_job(hdev, job);
1029 kfree(job);
1030 atomic_dec(&cb->cs_cnt);
1031
1032 release_cb:
1033 hl_cb_put(cb);
1034 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1035
1036 return rc;
1037 }
1038
1039 /*
1040 * gaudi_init_tpc_mem() - Initialize TPC memories.
1041 * @hdev: Pointer to hl_device structure.
1042 *
1043 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1044 *
1045 * Return: 0 for success, negative value for error.
1046 */
gaudi_init_tpc_mem(struct hl_device * hdev)1047 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1048 {
1049 const struct firmware *fw;
1050 size_t fw_size;
1051 void *cpu_addr;
1052 dma_addr_t dma_handle;
1053 int rc, count = 5;
1054
1055 again:
1056 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1057 if (rc == -EINTR && count-- > 0) {
1058 msleep(50);
1059 goto again;
1060 }
1061
1062 if (rc) {
1063 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1064 GAUDI_TPC_FW_FILE);
1065 goto out;
1066 }
1067
1068 fw_size = fw->size;
1069 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1070 if (!cpu_addr) {
1071 dev_err(hdev->dev,
1072 "Failed to allocate %zu of dma memory for TPC kernel\n",
1073 fw_size);
1074 rc = -ENOMEM;
1075 goto out;
1076 }
1077
1078 memcpy(cpu_addr, fw->data, fw_size);
1079
1080 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1081
1082 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1083
1084 out:
1085 release_firmware(fw);
1086 return rc;
1087 }
1088
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1089 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1090 {
1091 struct gaudi_device *gaudi = hdev->asic_specific;
1092 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1093 struct hl_hw_queue *q;
1094 u32 i, sob_id, sob_group_id, queue_id;
1095
1096 /* Iterate through SOB groups and assign a SOB for each slave queue */
1097 sob_group_id =
1098 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1099 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1100
1101 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1102 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1103 q = &hdev->kernel_queues[queue_id + (4 * i)];
1104 q->sync_stream_prop.collective_sob_id = sob_id + i;
1105 }
1106
1107 /* Both DMA5 and TPC7 use the same resources since only a single
1108 * engine need to participate in the reduction process
1109 */
1110 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1111 q = &hdev->kernel_queues[queue_id];
1112 q->sync_stream_prop.collective_sob_id =
1113 sob_id + NIC_NUMBER_OF_ENGINES;
1114
1115 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1116 q = &hdev->kernel_queues[queue_id];
1117 q->sync_stream_prop.collective_sob_id =
1118 sob_id + NIC_NUMBER_OF_ENGINES;
1119 }
1120
gaudi_sob_group_hw_reset(struct kref * ref)1121 static void gaudi_sob_group_hw_reset(struct kref *ref)
1122 {
1123 struct gaudi_hw_sob_group *hw_sob_group =
1124 container_of(ref, struct gaudi_hw_sob_group, kref);
1125 struct hl_device *hdev = hw_sob_group->hdev;
1126 int i;
1127
1128 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1129 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1130 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1131
1132 kref_init(&hw_sob_group->kref);
1133 }
1134
gaudi_sob_group_reset_error(struct kref * ref)1135 static void gaudi_sob_group_reset_error(struct kref *ref)
1136 {
1137 struct gaudi_hw_sob_group *hw_sob_group =
1138 container_of(ref, struct gaudi_hw_sob_group, kref);
1139 struct hl_device *hdev = hw_sob_group->hdev;
1140
1141 dev_crit(hdev->dev,
1142 "SOB release shouldn't be called here, base_sob_id: %d\n",
1143 hw_sob_group->base_sob_id);
1144 }
1145
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1146 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1147 {
1148 struct gaudi_collective_properties *prop;
1149 int i;
1150
1151 prop = &gaudi->collective_props;
1152
1153 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1154
1155 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1156 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1157 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1158 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1159 /* Set collective engine bit */
1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 }
1163
gaudi_collective_init(struct hl_device * hdev)1164 static int gaudi_collective_init(struct hl_device *hdev)
1165 {
1166 u32 i, sob_id, reserved_sobs_per_group;
1167 struct gaudi_collective_properties *prop;
1168 struct gaudi_device *gaudi;
1169
1170 gaudi = hdev->asic_specific;
1171 prop = &gaudi->collective_props;
1172 sob_id = hdev->asic_prop.collective_first_sob;
1173
1174 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1175 reserved_sobs_per_group =
1176 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1177
1178 /* Init SOB groups */
1179 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1180 prop->hw_sob_group[i].hdev = hdev;
1181 prop->hw_sob_group[i].base_sob_id = sob_id;
1182 sob_id += reserved_sobs_per_group;
1183 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1184 }
1185
1186 for (i = 0 ; i < QMAN_STREAMS; i++) {
1187 prop->next_sob_group_val[i] = 1;
1188 prop->curr_sob_group_idx[i] = 0;
1189 gaudi_collective_map_sobs(hdev, i);
1190 }
1191
1192 gaudi_collective_mstr_sob_mask_set(gaudi);
1193
1194 return 0;
1195 }
1196
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1197 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1198 {
1199 struct gaudi_device *gaudi = hdev->asic_specific;
1200 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1201
1202 kref_put(&cprop->hw_sob_group[sob_group].kref,
1203 gaudi_sob_group_hw_reset);
1204 }
1205
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1206 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1207 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1208 {
1209 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1210 struct gaudi_collective_properties *cprop;
1211 struct hl_gen_wait_properties wait_prop;
1212 struct hl_sync_stream_properties *prop;
1213 struct gaudi_device *gaudi;
1214
1215 gaudi = hdev->asic_specific;
1216 cprop = &gaudi->collective_props;
1217 queue_id = job->hw_queue_id;
1218 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1219
1220 master_sob_base =
1221 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1222 master_monitor = prop->collective_mstr_mon_id[0];
1223
1224 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1225
1226 dev_dbg(hdev->dev,
1227 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1228 master_sob_base, cprop->mstr_sob_mask[0],
1229 cprop->next_sob_group_val[stream],
1230 master_monitor, queue_id);
1231
1232 wait_prop.data = (void *) job->patched_cb;
1233 wait_prop.sob_base = master_sob_base;
1234 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1235 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1236 wait_prop.mon_id = master_monitor;
1237 wait_prop.q_idx = queue_id;
1238 wait_prop.size = cb_size;
1239 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1240
1241 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1242 master_monitor = prop->collective_mstr_mon_id[1];
1243
1244 dev_dbg(hdev->dev,
1245 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1246 master_sob_base, cprop->mstr_sob_mask[1],
1247 cprop->next_sob_group_val[stream],
1248 master_monitor, queue_id);
1249
1250 wait_prop.sob_base = master_sob_base;
1251 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1252 wait_prop.mon_id = master_monitor;
1253 wait_prop.size = cb_size;
1254 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1255 }
1256
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1257 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1258 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1259 {
1260 struct hl_gen_wait_properties wait_prop;
1261 struct hl_sync_stream_properties *prop;
1262 u32 queue_id, cb_size = 0;
1263
1264 queue_id = job->hw_queue_id;
1265 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1266
1267 if (job->cs->encaps_signals) {
1268 /* use the encaps signal handle store earlier in the flow
1269 * and set the SOB information from the encaps
1270 * signals handle
1271 */
1272 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1273 cs_cmpl);
1274
1275 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1276 job->cs->sequence,
1277 cs_cmpl->hw_sob->sob_id,
1278 cs_cmpl->sob_val);
1279 }
1280
1281 /* Add to wait CBs using slave monitor */
1282 wait_prop.data = (void *) job->user_cb;
1283 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1284 wait_prop.sob_mask = 0x1;
1285 wait_prop.sob_val = cs_cmpl->sob_val;
1286 wait_prop.mon_id = prop->collective_slave_mon_id;
1287 wait_prop.q_idx = queue_id;
1288 wait_prop.size = cb_size;
1289
1290 dev_dbg(hdev->dev,
1291 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1292 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1293 prop->collective_slave_mon_id, queue_id);
1294
1295 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1296
1297 dev_dbg(hdev->dev,
1298 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1299 prop->collective_sob_id, queue_id);
1300
1301 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1302 prop->collective_sob_id, cb_size, false);
1303 }
1304
gaudi_collective_wait_init_cs(struct hl_cs * cs)1305 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1306 {
1307 struct hl_cs_compl *signal_cs_cmpl =
1308 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1309 struct hl_cs_compl *cs_cmpl =
1310 container_of(cs->fence, struct hl_cs_compl, base_fence);
1311 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1312 struct gaudi_collective_properties *cprop;
1313 u32 stream, queue_id, sob_group_offset;
1314 struct gaudi_device *gaudi;
1315 struct hl_device *hdev;
1316 struct hl_cs_job *job;
1317 struct hl_ctx *ctx;
1318
1319 ctx = cs->ctx;
1320 hdev = ctx->hdev;
1321 gaudi = hdev->asic_specific;
1322 cprop = &gaudi->collective_props;
1323
1324 if (cs->encaps_signals) {
1325 cs_cmpl->hw_sob = handle->hw_sob;
1326 /* at this checkpoint we only need the hw_sob pointer
1327 * for the completion check before start going over the jobs
1328 * of the master/slaves, the sob_value will be taken later on
1329 * in gaudi_collective_slave_init_job depends on each
1330 * job wait offset value.
1331 */
1332 cs_cmpl->sob_val = 0;
1333 } else {
1334 /* copy the SOB id and value of the signal CS */
1335 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1336 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1337 }
1338
1339 /* check again if the signal cs already completed.
1340 * if yes then don't send any wait cs since the hw_sob
1341 * could be in reset already. if signal is not completed
1342 * then get refcount to hw_sob to prevent resetting the sob
1343 * while wait cs is not submitted.
1344 * note that this check is protected by two locks,
1345 * hw queue lock and completion object lock,
1346 * and the same completion object lock also protects
1347 * the hw_sob reset handler function.
1348 * The hw_queue lock prevent out of sync of hw_sob
1349 * refcount value, changed by signal/wait flows.
1350 */
1351 spin_lock(&signal_cs_cmpl->lock);
1352
1353 if (completion_done(&cs->signal_fence->completion)) {
1354 spin_unlock(&signal_cs_cmpl->lock);
1355 return -EINVAL;
1356 }
1357 /* Increment kref since all slave queues are now waiting on it */
1358 kref_get(&cs_cmpl->hw_sob->kref);
1359
1360 spin_unlock(&signal_cs_cmpl->lock);
1361
1362 /* Calculate the stream from collective master queue (1st job) */
1363 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1364 stream = job->hw_queue_id % 4;
1365 sob_group_offset =
1366 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1367
1368 list_for_each_entry(job, &cs->job_list, cs_node) {
1369 queue_id = job->hw_queue_id;
1370
1371 if (hdev->kernel_queues[queue_id].collective_mode ==
1372 HL_COLLECTIVE_MASTER)
1373 gaudi_collective_master_init_job(hdev, job, stream,
1374 sob_group_offset);
1375 else
1376 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1377 }
1378
1379 cs_cmpl->sob_group = sob_group_offset;
1380
1381 /* Handle sob group kref and wraparound */
1382 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1383 cprop->next_sob_group_val[stream]++;
1384
1385 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1386 /*
1387 * Decrement as we reached the max value.
1388 * The release function won't be called here as we've
1389 * just incremented the refcount.
1390 */
1391 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1392 gaudi_sob_group_reset_error);
1393 cprop->next_sob_group_val[stream] = 1;
1394 /* only two SOBs are currently in use */
1395 cprop->curr_sob_group_idx[stream] =
1396 (cprop->curr_sob_group_idx[stream] + 1) &
1397 (HL_RSVD_SOBS - 1);
1398
1399 gaudi_collective_map_sobs(hdev, stream);
1400
1401 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1402 cprop->curr_sob_group_idx[stream], stream);
1403 }
1404
1405 mb();
1406 hl_fence_put(cs->signal_fence);
1407 cs->signal_fence = NULL;
1408
1409 return 0;
1410 }
1411
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1412 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1413 {
1414 u32 cacheline_end, additional_commands;
1415
1416 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1417 additional_commands = sizeof(struct packet_msg_prot) * 2;
1418
1419 if (user_cb_size + additional_commands > cacheline_end)
1420 return cacheline_end - user_cb_size + additional_commands;
1421 else
1422 return additional_commands;
1423 }
1424
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1425 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1426 struct hl_ctx *ctx, struct hl_cs *cs,
1427 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1428 u32 encaps_signal_offset)
1429 {
1430 struct hw_queue_properties *hw_queue_prop;
1431 struct hl_cs_counters_atomic *cntr;
1432 struct hl_cs_job *job;
1433 struct hl_cb *cb;
1434 u32 cb_size;
1435 bool patched_cb;
1436
1437 cntr = &hdev->aggregated_cs_counters;
1438
1439 if (mode == HL_COLLECTIVE_MASTER) {
1440 /* CB size of collective master queue contains
1441 * 4 msg short packets for monitor 1 configuration
1442 * 1 fence packet
1443 * 4 msg short packets for monitor 2 configuration
1444 * 1 fence packet
1445 * 2 msg prot packets for completion and MSI
1446 */
1447 cb_size = sizeof(struct packet_msg_short) * 8 +
1448 sizeof(struct packet_fence) * 2 +
1449 sizeof(struct packet_msg_prot) * 2;
1450 patched_cb = true;
1451 } else {
1452 /* CB size of collective slave queues contains
1453 * 4 msg short packets for monitor configuration
1454 * 1 fence packet
1455 * 1 additional msg short packet for sob signal
1456 */
1457 cb_size = sizeof(struct packet_msg_short) * 5 +
1458 sizeof(struct packet_fence);
1459 patched_cb = false;
1460 }
1461
1462 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1463 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1464 if (!job) {
1465 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1466 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1467 dev_err(hdev->dev, "Failed to allocate a new job\n");
1468 return -ENOMEM;
1469 }
1470
1471 /* Allocate internal mapped CB for non patched CBs */
1472 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1473 if (!cb) {
1474 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1475 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1476 kfree(job);
1477 return -EFAULT;
1478 }
1479
1480 job->id = 0;
1481 job->cs = cs;
1482 job->user_cb = cb;
1483 atomic_inc(&job->user_cb->cs_cnt);
1484 job->user_cb_size = cb_size;
1485 job->hw_queue_id = queue_id;
1486
1487 /* since its guaranteed to have only one chunk in the collective wait
1488 * cs, we can use this chunk to set the encapsulated signal offset
1489 * in the jobs.
1490 */
1491 if (cs->encaps_signals)
1492 job->encaps_sig_wait_offset = encaps_signal_offset;
1493
1494 /*
1495 * No need in parsing, user CB is the patched CB.
1496 * We call hl_cb_destroy() out of two reasons - we don't need
1497 * the CB in the CB idr anymore and to decrement its refcount as
1498 * it was incremented inside hl_cb_kernel_create().
1499 */
1500 if (patched_cb)
1501 job->patched_cb = job->user_cb;
1502 else
1503 job->patched_cb = NULL;
1504
1505 job->job_cb_size = job->user_cb_size;
1506 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1507
1508 /* increment refcount as for external queues we get completion */
1509 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1510 cs_get(cs);
1511
1512 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1513
1514 list_add_tail(&job->cs_node, &cs->job_list);
1515
1516 hl_debugfs_add_job(hdev, job);
1517
1518 return 0;
1519 }
1520
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1521 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1522 struct hl_ctx *ctx, struct hl_cs *cs,
1523 u32 wait_queue_id, u32 collective_engine_id,
1524 u32 encaps_signal_offset)
1525 {
1526 struct gaudi_device *gaudi = hdev->asic_specific;
1527 struct hw_queue_properties *hw_queue_prop;
1528 u32 queue_id, collective_queue, num_jobs;
1529 u32 stream, nic_queue, nic_idx = 0;
1530 bool skip;
1531 int i, rc = 0;
1532
1533 /* Verify wait queue id is configured as master */
1534 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1535 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1536 dev_err(hdev->dev,
1537 "Queue %d is not configured as collective master\n",
1538 wait_queue_id);
1539 return -EINVAL;
1540 }
1541
1542 /* Verify engine id is supported */
1543 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1544 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1545 dev_err(hdev->dev,
1546 "Collective wait does not support engine %u\n",
1547 collective_engine_id);
1548 return -EINVAL;
1549 }
1550
1551 stream = wait_queue_id % 4;
1552
1553 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1554 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1555 else
1556 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1557
1558 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1559 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1560
1561 /* First job goes to the collective master queue, it will wait for
1562 * the collective slave queues to finish execution.
1563 * The synchronization is done using two monitors:
1564 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1565 * reduction engine (DMA5/TPC7).
1566 *
1567 * Rest of the jobs goes to the collective slave queues which will
1568 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1569 */
1570 for (i = 0 ; i < num_jobs ; i++) {
1571 if (i == 0) {
1572 queue_id = wait_queue_id;
1573 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1574 HL_COLLECTIVE_MASTER, queue_id,
1575 wait_queue_id, encaps_signal_offset);
1576 } else {
1577 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1578 if (gaudi->hw_cap_initialized &
1579 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1580 skip = false;
1581 else
1582 skip = true;
1583
1584 queue_id = nic_queue;
1585 nic_queue += 4;
1586 nic_idx++;
1587
1588 if (skip)
1589 continue;
1590 } else {
1591 queue_id = collective_queue;
1592 }
1593
1594 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1595 HL_COLLECTIVE_SLAVE, queue_id,
1596 wait_queue_id, encaps_signal_offset);
1597 }
1598
1599 if (rc)
1600 return rc;
1601 }
1602
1603 return rc;
1604 }
1605
gaudi_late_init(struct hl_device * hdev)1606 static int gaudi_late_init(struct hl_device *hdev)
1607 {
1608 struct gaudi_device *gaudi = hdev->asic_specific;
1609 int rc;
1610
1611 rc = gaudi->cpucp_info_get(hdev);
1612 if (rc) {
1613 dev_err(hdev->dev, "Failed to get cpucp info\n");
1614 return rc;
1615 }
1616
1617 if ((hdev->card_type == cpucp_card_type_pci) &&
1618 (hdev->nic_ports_mask & 0x3)) {
1619 dev_info(hdev->dev,
1620 "PCI card detected, only 8 ports are enabled\n");
1621 hdev->nic_ports_mask &= ~0x3;
1622
1623 /* Stop and disable unused NIC QMANs */
1624 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627
1628 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1629 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1630 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1631
1632 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1633 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1634
1635 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1636 }
1637
1638 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1639 if (rc) {
1640 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1641 return rc;
1642 }
1643
1644 /* Scrub both SRAM and DRAM */
1645 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1646 if (rc)
1647 goto disable_pci_access;
1648
1649 rc = gaudi_fetch_psoc_frequency(hdev);
1650 if (rc) {
1651 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1652 goto disable_pci_access;
1653 }
1654
1655 rc = gaudi_mmu_clear_pgt_range(hdev);
1656 if (rc) {
1657 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1658 goto disable_pci_access;
1659 }
1660
1661 rc = gaudi_init_tpc_mem(hdev);
1662 if (rc) {
1663 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1664 goto disable_pci_access;
1665 }
1666
1667 rc = gaudi_collective_init(hdev);
1668 if (rc) {
1669 dev_err(hdev->dev, "Failed to init collective\n");
1670 goto disable_pci_access;
1671 }
1672
1673 /* We only support a single ASID for the user, so for the sake of optimization, just
1674 * initialize the ASID one time during device initialization with the fixed value of 1
1675 */
1676 gaudi_mmu_prepare(hdev, 1);
1677
1678 hl_fw_set_pll_profile(hdev);
1679
1680 return 0;
1681
1682 disable_pci_access:
1683 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1684
1685 return rc;
1686 }
1687
gaudi_late_fini(struct hl_device * hdev)1688 static void gaudi_late_fini(struct hl_device *hdev)
1689 {
1690 hl_hwmon_release_resources(hdev);
1691 }
1692
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1693 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1694 {
1695 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1696 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1697 int i, j, rc = 0;
1698
1699 /*
1700 * The device CPU works with 40-bits addresses, while bit 39 must be set
1701 * to '1' when accessing the host.
1702 * Bits 49:39 of the full host address are saved for a later
1703 * configuration of the HW to perform extension to 50 bits.
1704 * Because there is a single HW register that holds the extension bits,
1705 * these bits must be identical in all allocated range.
1706 */
1707
1708 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1709 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 &dma_addr_arr[i],
1711 GFP_KERNEL | __GFP_ZERO);
1712 if (!virt_addr_arr[i]) {
1713 rc = -ENOMEM;
1714 goto free_dma_mem_arr;
1715 }
1716
1717 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1718 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1719 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1720 break;
1721 }
1722
1723 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1724 dev_err(hdev->dev,
1725 "MSB of CPU accessible DMA memory are not identical in all range\n");
1726 rc = -EFAULT;
1727 goto free_dma_mem_arr;
1728 }
1729
1730 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1731 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1732 hdev->cpu_pci_msb_addr =
1733 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1734
1735 if (!hdev->asic_prop.fw_security_enabled)
1736 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1737
1738 free_dma_mem_arr:
1739 for (j = 0 ; j < i ; j++)
1740 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1741 dma_addr_arr[j]);
1742
1743 return rc;
1744 }
1745
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1746 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1747 {
1748 struct gaudi_device *gaudi = hdev->asic_specific;
1749 struct gaudi_internal_qman_info *q;
1750 u32 i;
1751
1752 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1753 q = &gaudi->internal_qmans[i];
1754 if (!q->pq_kernel_addr)
1755 continue;
1756 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1757 }
1758 }
1759
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1760 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1761 {
1762 struct gaudi_device *gaudi = hdev->asic_specific;
1763 struct gaudi_internal_qman_info *q;
1764 int rc, i;
1765
1766 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1767 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1768 continue;
1769
1770 q = &gaudi->internal_qmans[i];
1771
1772 switch (i) {
1773 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1774 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1775 break;
1776 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1777 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1778 break;
1779 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1780 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1781 break;
1782 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1783 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1784 break;
1785 default:
1786 dev_err(hdev->dev, "Bad internal queue index %d", i);
1787 rc = -EINVAL;
1788 goto free_internal_qmans_pq_mem;
1789 }
1790
1791 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1792 GFP_KERNEL | __GFP_ZERO);
1793 if (!q->pq_kernel_addr) {
1794 rc = -ENOMEM;
1795 goto free_internal_qmans_pq_mem;
1796 }
1797 }
1798
1799 return 0;
1800
1801 free_internal_qmans_pq_mem:
1802 gaudi_free_internal_qmans_pq_mem(hdev);
1803 return rc;
1804 }
1805
gaudi_set_pci_memory_regions(struct hl_device * hdev)1806 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1807 {
1808 struct asic_fixed_properties *prop = &hdev->asic_prop;
1809 struct pci_mem_region *region;
1810
1811 /* CFG */
1812 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1813 region->region_base = CFG_BASE;
1814 region->region_size = CFG_SIZE;
1815 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1816 region->bar_size = CFG_BAR_SIZE;
1817 region->bar_id = CFG_BAR_ID;
1818 region->used = 1;
1819
1820 /* SRAM */
1821 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1822 region->region_base = SRAM_BASE_ADDR;
1823 region->region_size = SRAM_SIZE;
1824 region->offset_in_bar = 0;
1825 region->bar_size = SRAM_BAR_SIZE;
1826 region->bar_id = SRAM_BAR_ID;
1827 region->used = 1;
1828
1829 /* DRAM */
1830 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1831 region->region_base = DRAM_PHYS_BASE;
1832 region->region_size = hdev->asic_prop.dram_size;
1833 region->offset_in_bar = 0;
1834 region->bar_size = prop->dram_pci_bar_size;
1835 region->bar_id = HBM_BAR_ID;
1836 region->used = 1;
1837
1838 /* SP SRAM */
1839 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1840 region->region_base = PSOC_SCRATCHPAD_ADDR;
1841 region->region_size = PSOC_SCRATCHPAD_SIZE;
1842 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1843 region->bar_size = CFG_BAR_SIZE;
1844 region->bar_id = CFG_BAR_ID;
1845 region->used = 1;
1846 }
1847
gaudi_sw_init(struct hl_device * hdev)1848 static int gaudi_sw_init(struct hl_device *hdev)
1849 {
1850 struct gaudi_device *gaudi;
1851 u32 i, event_id = 0;
1852 int rc;
1853
1854 /* Allocate device structure */
1855 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1856 if (!gaudi)
1857 return -ENOMEM;
1858
1859 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1860 if (gaudi_irq_map_table[i].valid) {
1861 if (event_id == GAUDI_EVENT_SIZE) {
1862 dev_err(hdev->dev,
1863 "Event array exceeds the limit of %u events\n",
1864 GAUDI_EVENT_SIZE);
1865 rc = -EINVAL;
1866 goto free_gaudi_device;
1867 }
1868
1869 gaudi->events[event_id++] =
1870 gaudi_irq_map_table[i].fc_id;
1871 }
1872 }
1873
1874 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1875
1876 hdev->asic_specific = gaudi;
1877
1878 /* Create DMA pool for small allocations */
1879 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1880 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1881 if (!hdev->dma_pool) {
1882 dev_err(hdev->dev, "failed to create DMA pool\n");
1883 rc = -ENOMEM;
1884 goto free_gaudi_device;
1885 }
1886
1887 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1888 if (rc)
1889 goto free_dma_pool;
1890
1891 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1892 if (!hdev->cpu_accessible_dma_pool) {
1893 dev_err(hdev->dev,
1894 "Failed to create CPU accessible DMA pool\n");
1895 rc = -ENOMEM;
1896 goto free_cpu_dma_mem;
1897 }
1898
1899 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1900 (uintptr_t) hdev->cpu_accessible_dma_mem,
1901 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1902 if (rc) {
1903 dev_err(hdev->dev,
1904 "Failed to add memory to CPU accessible DMA pool\n");
1905 rc = -EFAULT;
1906 goto free_cpu_accessible_dma_pool;
1907 }
1908
1909 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1910 if (rc)
1911 goto free_cpu_accessible_dma_pool;
1912
1913 spin_lock_init(&gaudi->hw_queues_lock);
1914
1915 hdev->supports_sync_stream = true;
1916 hdev->supports_coresight = true;
1917 hdev->supports_staged_submission = true;
1918 hdev->supports_wait_for_multi_cs = true;
1919
1920 hdev->asic_funcs->set_pci_memory_regions(hdev);
1921 hdev->stream_master_qid_arr =
1922 hdev->asic_funcs->get_stream_master_qid_arr();
1923 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1924
1925 return 0;
1926
1927 free_cpu_accessible_dma_pool:
1928 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1929 free_cpu_dma_mem:
1930 if (!hdev->asic_prop.fw_security_enabled)
1931 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1932 hdev->cpu_pci_msb_addr);
1933 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1934 hdev->cpu_accessible_dma_address);
1935 free_dma_pool:
1936 dma_pool_destroy(hdev->dma_pool);
1937 free_gaudi_device:
1938 kfree(gaudi);
1939 return rc;
1940 }
1941
gaudi_sw_fini(struct hl_device * hdev)1942 static int gaudi_sw_fini(struct hl_device *hdev)
1943 {
1944 struct gaudi_device *gaudi = hdev->asic_specific;
1945
1946 gaudi_free_internal_qmans_pq_mem(hdev);
1947
1948 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1949
1950 if (!hdev->asic_prop.fw_security_enabled)
1951 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1952 hdev->cpu_pci_msb_addr);
1953
1954 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1955 hdev->cpu_accessible_dma_address);
1956
1957 dma_pool_destroy(hdev->dma_pool);
1958
1959 kfree(gaudi);
1960
1961 return 0;
1962 }
1963
gaudi_irq_handler_single(int irq,void * arg)1964 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1965 {
1966 struct hl_device *hdev = arg;
1967 int i;
1968
1969 if (hdev->disabled)
1970 return IRQ_HANDLED;
1971
1972 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1973 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1974
1975 hl_irq_handler_eq(irq, &hdev->event_queue);
1976
1977 return IRQ_HANDLED;
1978 }
1979
1980 /*
1981 * For backward compatibility, new MSI interrupts should be set after the
1982 * existing CPU and NIC interrupts.
1983 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1984 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1985 bool cpu_eq)
1986 {
1987 int msi_vec;
1988
1989 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1990 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1991 GAUDI_EVENT_QUEUE_MSI_IDX);
1992
1993 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1994 (nr + NIC_NUMBER_OF_ENGINES + 1);
1995
1996 return pci_irq_vector(hdev->pdev, msi_vec);
1997 }
1998
gaudi_enable_msi_single(struct hl_device * hdev)1999 static int gaudi_enable_msi_single(struct hl_device *hdev)
2000 {
2001 int rc, irq;
2002
2003 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2004
2005 irq = gaudi_pci_irq_vector(hdev, 0, false);
2006 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2007 "gaudi single msi", hdev);
2008 if (rc)
2009 dev_err(hdev->dev,
2010 "Failed to request single MSI IRQ\n");
2011
2012 return rc;
2013 }
2014
gaudi_enable_msi(struct hl_device * hdev)2015 static int gaudi_enable_msi(struct hl_device *hdev)
2016 {
2017 struct gaudi_device *gaudi = hdev->asic_specific;
2018 int rc;
2019
2020 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2021 return 0;
2022
2023 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2024 if (rc < 0) {
2025 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2026 return rc;
2027 }
2028
2029 rc = gaudi_enable_msi_single(hdev);
2030 if (rc)
2031 goto free_pci_irq_vectors;
2032
2033 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2034
2035 return 0;
2036
2037 free_pci_irq_vectors:
2038 pci_free_irq_vectors(hdev->pdev);
2039 return rc;
2040 }
2041
gaudi_sync_irqs(struct hl_device * hdev)2042 static void gaudi_sync_irqs(struct hl_device *hdev)
2043 {
2044 struct gaudi_device *gaudi = hdev->asic_specific;
2045
2046 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2047 return;
2048
2049 /* Wait for all pending IRQs to be finished */
2050 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2051 }
2052
gaudi_disable_msi(struct hl_device * hdev)2053 static void gaudi_disable_msi(struct hl_device *hdev)
2054 {
2055 struct gaudi_device *gaudi = hdev->asic_specific;
2056
2057 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2058 return;
2059
2060 gaudi_sync_irqs(hdev);
2061 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2062 pci_free_irq_vectors(hdev->pdev);
2063
2064 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2065 }
2066
gaudi_init_scrambler_sram(struct hl_device * hdev)2067 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2068 {
2069 struct gaudi_device *gaudi = hdev->asic_specific;
2070
2071 if (hdev->asic_prop.fw_security_enabled)
2072 return;
2073
2074 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2075 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2076 return;
2077
2078 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2079 return;
2080
2081 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2082 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2084 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2086 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2088 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097
2098 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2099 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2101 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2103 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2105 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114
2115 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2116 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2118 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2120 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2122 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131
2132 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2133 }
2134
gaudi_init_scrambler_hbm(struct hl_device * hdev)2135 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2136 {
2137 struct gaudi_device *gaudi = hdev->asic_specific;
2138
2139 if (hdev->asic_prop.fw_security_enabled)
2140 return;
2141
2142 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2143 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2144 return;
2145
2146 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2147 return;
2148
2149 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2150 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2151 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2154 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165
2166 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2167 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2169 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2171 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2173 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182
2183 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2184 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2185 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2186 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2188 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2190 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199
2200 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2201 }
2202
gaudi_init_e2e(struct hl_device * hdev)2203 static void gaudi_init_e2e(struct hl_device *hdev)
2204 {
2205 if (hdev->asic_prop.fw_security_enabled)
2206 return;
2207
2208 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2209 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2210 return;
2211
2212 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2214 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2216
2217 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2219 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2221
2222 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2224 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2226
2227 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2229 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2231
2232 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2234 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2236
2237 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2239 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2241
2242 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2244 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2246
2247 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2249 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2251
2252 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2254 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2256
2257 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2259 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2261
2262 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2264 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2266
2267 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2269 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2271
2272 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2274 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2276
2277 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2279 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2281
2282 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2284 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2286
2287 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2289 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2291
2292 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2296
2297 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2301
2302 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2306
2307 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2311
2312 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2316
2317 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2321
2322 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2326
2327 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2331
2332 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336
2337 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341
2342 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346
2347 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351
2352 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356
2357 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361
2362 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366
2367 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371
2372 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376
2377 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381
2382 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386
2387 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2391
2392 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2396
2397 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2401
2402 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2406
2407 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2411
2412 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416
2417 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421
2422 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426
2427 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2431
2432 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2436
2437 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2441
2442 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2446
2447 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2451 }
2452
gaudi_init_hbm_cred(struct hl_device * hdev)2453 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2454 {
2455 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2456
2457 if (hdev->asic_prop.fw_security_enabled)
2458 return;
2459
2460 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2462 return;
2463
2464 hbm0_wr = 0x33333333;
2465 hbm0_rd = 0x77777777;
2466 hbm1_wr = 0x55555555;
2467 hbm1_rd = 0xDDDDDDDD;
2468
2469 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2473
2474 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2478
2479 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2483
2484 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2488
2489 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501
2502 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 }
2515
gaudi_init_golden_registers(struct hl_device * hdev)2516 static void gaudi_init_golden_registers(struct hl_device *hdev)
2517 {
2518 u32 tpc_offset;
2519 int tpc_id, i;
2520
2521 gaudi_init_e2e(hdev);
2522 gaudi_init_hbm_cred(hdev);
2523
2524 for (tpc_id = 0, tpc_offset = 0;
2525 tpc_id < TPC_NUMBER_OF_ENGINES;
2526 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527 /* Mask all arithmetic interrupts from TPC */
2528 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2529 /* Set 16 cache lines */
2530 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531 ICACHE_FETCH_LINE_NUM, 2);
2532 }
2533
2534 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535 for (i = 0 ; i < 128 ; i += 8)
2536 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2537
2538 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 }
2543
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545 int qman_id, dma_addr_t qman_pq_addr)
2546 {
2547 struct cpu_dyn_regs *dyn_regs =
2548 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551 u32 q_off, dma_qm_offset;
2552 u32 dma_qm_err_cfg, irq_handler_offset;
2553
2554 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555
2556 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560 so_base_en_lo = lower_32_bits(CFG_BASE +
2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562 so_base_en_hi = upper_32_bits(CFG_BASE +
2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 so_base_ws_lo = lower_32_bits(CFG_BASE +
2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570 so_base_ws_hi = upper_32_bits(CFG_BASE +
2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572
2573 q_off = dma_qm_offset + qman_id * 4;
2574
2575 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2577
2578 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2581
2582 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584 QMAN_LDMA_SRC_OFFSET);
2585 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586 QMAN_LDMA_DST_OFFSET);
2587
2588 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2596
2597 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2598
2599 /* The following configuration is needed only once per QMAN */
2600 if (qman_id == 0) {
2601 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2604
2605 /* Configure RAZWI IRQ */
2606 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607 if (hdev->stop_on_err)
2608 dma_qm_err_cfg |=
2609 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2610
2611 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2612
2613 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614 lower_32_bits(CFG_BASE + irq_handler_offset));
2615 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616 upper_32_bits(CFG_BASE + irq_handler_offset));
2617
2618 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2620 dma_id);
2621
2622 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623 QM_ARB_ERR_MSG_EN_MASK);
2624
2625 /* Set timeout to maximum */
2626 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2627
2628 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2629 QMAN_EXTERNAL_MAKE_TRUSTED);
2630
2631 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2632 }
2633 }
2634
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2635 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2636 {
2637 struct cpu_dyn_regs *dyn_regs =
2638 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2639 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2640 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2641 u32 irq_handler_offset;
2642
2643 /* Set to maximum possible according to physical size */
2644 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2645 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2646
2647 /* WA for H/W bug H3-2116 */
2648 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2649
2650 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2651 if (hdev->stop_on_err)
2652 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2653
2654 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2655
2656 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2657 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2658 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2659
2660 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2661 lower_32_bits(CFG_BASE + irq_handler_offset));
2662 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2663 upper_32_bits(CFG_BASE + irq_handler_offset));
2664
2665 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2666 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2667 WREG32(mmDMA0_CORE_PROT + dma_offset,
2668 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2669 /* If the channel is secured, it should be in MMU bypass mode */
2670 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2671 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2672 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2673 }
2674
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2675 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2676 u32 enable_mask)
2677 {
2678 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2679
2680 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2681 }
2682
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2683 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2684 {
2685 struct gaudi_device *gaudi = hdev->asic_specific;
2686 struct hl_hw_queue *q;
2687 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2688
2689 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2690 return;
2691
2692 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2693 dma_id = gaudi_dma_assignment[i];
2694 /*
2695 * For queues after the CPU Q need to add 1 to get the correct
2696 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2697 * order to get the correct MSI register.
2698 */
2699 if (dma_id > 1) {
2700 cpu_skip = 1;
2701 nic_skip = NIC_NUMBER_OF_ENGINES;
2702 } else {
2703 cpu_skip = 0;
2704 nic_skip = 0;
2705 }
2706
2707 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2708 q_idx = 4 * dma_id + j + cpu_skip;
2709 q = &hdev->kernel_queues[q_idx];
2710 q->cq_id = cq_id++;
2711 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2712 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2713 q->bus_address);
2714 }
2715
2716 gaudi_init_dma_core(hdev, dma_id);
2717
2718 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2719 }
2720
2721 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2722 }
2723
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2724 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2725 int qman_id, u64 qman_base_addr)
2726 {
2727 struct cpu_dyn_regs *dyn_regs =
2728 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2729 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2730 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2731 u32 dma_qm_err_cfg, irq_handler_offset;
2732 u32 q_off, dma_qm_offset;
2733
2734 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2735
2736 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2737 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2738 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2739 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740 so_base_en_lo = lower_32_bits(CFG_BASE +
2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2742 so_base_en_hi = upper_32_bits(CFG_BASE +
2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2745 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2747 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 so_base_ws_lo = lower_32_bits(CFG_BASE +
2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750 so_base_ws_hi = upper_32_bits(CFG_BASE +
2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752
2753 q_off = dma_qm_offset + qman_id * 4;
2754
2755 if (qman_id < 4) {
2756 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2757 lower_32_bits(qman_base_addr));
2758 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2759 upper_32_bits(qman_base_addr));
2760
2761 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2762 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764
2765 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2766 QMAN_CPDMA_SIZE_OFFSET);
2767 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2768 QMAN_CPDMA_SRC_OFFSET);
2769 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2770 QMAN_CPDMA_DST_OFFSET);
2771 } else {
2772 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2773 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2774 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2775
2776 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2777 QMAN_LDMA_SIZE_OFFSET);
2778 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2779 QMAN_LDMA_SRC_OFFSET);
2780 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2781 QMAN_LDMA_DST_OFFSET);
2782
2783 /* Configure RAZWI IRQ */
2784 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2785 if (hdev->stop_on_err)
2786 dma_qm_err_cfg |=
2787 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2788
2789 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2790
2791 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2792 lower_32_bits(CFG_BASE + irq_handler_offset));
2793 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2794 upper_32_bits(CFG_BASE + irq_handler_offset));
2795
2796 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2797 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2798 dma_id);
2799
2800 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2801 QM_ARB_ERR_MSG_EN_MASK);
2802
2803 /* Set timeout to maximum */
2804 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2805
2806 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808 QMAN_INTERNAL_MAKE_TRUSTED);
2809 }
2810
2811 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819 mtr_base_ws_lo);
2820 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821 mtr_base_ws_hi);
2822 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823 so_base_ws_lo);
2824 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825 so_base_ws_hi);
2826 }
2827 }
2828
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2829 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830 {
2831 struct gaudi_device *gaudi = hdev->asic_specific;
2832 struct gaudi_internal_qman_info *q;
2833 u64 qman_base_addr;
2834 int i, j, dma_id, internal_q_index;
2835
2836 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837 return;
2838
2839 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843 /*
2844 * Add the CPU queue in order to get the correct queue
2845 * number as all internal queue are placed after it
2846 */
2847 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849 q = &gaudi->internal_qmans[internal_q_index];
2850 qman_base_addr = (u64) q->pq_dma_addr;
2851 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852 qman_base_addr);
2853 }
2854
2855 /* Initializing lower CP for HBM DMA QMAN */
2856 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858 gaudi_init_dma_core(hdev, dma_id);
2859
2860 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861 }
2862
2863 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864 }
2865
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2866 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867 int qman_id, u64 qman_base_addr)
2868 {
2869 struct cpu_dyn_regs *dyn_regs =
2870 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871 u32 mtr_base_lo, mtr_base_hi;
2872 u32 so_base_lo, so_base_hi;
2873 u32 irq_handler_offset;
2874 u32 q_off, mme_id;
2875 u32 mme_qm_err_cfg;
2876
2877 mtr_base_lo = lower_32_bits(CFG_BASE +
2878 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879 mtr_base_hi = upper_32_bits(CFG_BASE +
2880 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 so_base_lo = lower_32_bits(CFG_BASE +
2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883 so_base_hi = upper_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886 q_off = mme_offset + qman_id * 4;
2887
2888 if (qman_id < 4) {
2889 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890 lower_32_bits(qman_base_addr));
2891 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892 upper_32_bits(qman_base_addr));
2893
2894 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899 QMAN_CPDMA_SIZE_OFFSET);
2900 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901 QMAN_CPDMA_SRC_OFFSET);
2902 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903 QMAN_CPDMA_DST_OFFSET);
2904 } else {
2905 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910 QMAN_LDMA_SIZE_OFFSET);
2911 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912 QMAN_LDMA_SRC_OFFSET);
2913 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914 QMAN_LDMA_DST_OFFSET);
2915
2916 /* Configure RAZWI IRQ */
2917 mme_id = mme_offset /
2918 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921 if (hdev->stop_on_err)
2922 mme_qm_err_cfg |=
2923 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928 lower_32_bits(CFG_BASE + irq_handler_offset));
2929 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930 upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934 mme_id);
2935
2936 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937 QM_ARB_ERR_MSG_EN_MASK);
2938
2939 /* Set timeout to maximum */
2940 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2941
2942 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2943 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2944 QMAN_INTERNAL_MAKE_TRUSTED);
2945 }
2946
2947 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2949 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2951 }
2952
gaudi_init_mme_qmans(struct hl_device * hdev)2953 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2954 {
2955 struct gaudi_device *gaudi = hdev->asic_specific;
2956 struct gaudi_internal_qman_info *q;
2957 u64 qman_base_addr;
2958 u32 mme_offset;
2959 int i, internal_q_index;
2960
2961 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2962 return;
2963
2964 /*
2965 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2966 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2967 */
2968
2969 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2970
2971 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2972 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2973 q = &gaudi->internal_qmans[internal_q_index];
2974 qman_base_addr = (u64) q->pq_dma_addr;
2975 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2976 qman_base_addr);
2977 if (i == 3)
2978 mme_offset = 0;
2979 }
2980
2981 /* Initializing lower CP for MME QMANs */
2982 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2983 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2984 gaudi_init_mme_qman(hdev, 0, 4, 0);
2985
2986 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2987 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988
2989 gaudi->hw_cap_initialized |= HW_CAP_MME;
2990 }
2991
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2992 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2993 int qman_id, u64 qman_base_addr)
2994 {
2995 struct cpu_dyn_regs *dyn_regs =
2996 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2997 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2998 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2999 u32 tpc_qm_err_cfg, irq_handler_offset;
3000 u32 q_off, tpc_id;
3001
3002 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3003 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3004 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3005 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006 so_base_en_lo = lower_32_bits(CFG_BASE +
3007 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3008 so_base_en_hi = upper_32_bits(CFG_BASE +
3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3011 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3012 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3013 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 so_base_ws_lo = lower_32_bits(CFG_BASE +
3015 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016 so_base_ws_hi = upper_32_bits(CFG_BASE +
3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018
3019 q_off = tpc_offset + qman_id * 4;
3020
3021 tpc_id = tpc_offset /
3022 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3023
3024 if (qman_id < 4) {
3025 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3026 lower_32_bits(qman_base_addr));
3027 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3028 upper_32_bits(qman_base_addr));
3029
3030 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3031 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3032 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3033
3034 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3035 QMAN_CPDMA_SIZE_OFFSET);
3036 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3037 QMAN_CPDMA_SRC_OFFSET);
3038 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3039 QMAN_CPDMA_DST_OFFSET);
3040 } else {
3041 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3042 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3043 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3044
3045 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3046 QMAN_LDMA_SIZE_OFFSET);
3047 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3048 QMAN_LDMA_SRC_OFFSET);
3049 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3050 QMAN_LDMA_DST_OFFSET);
3051
3052 /* Configure RAZWI IRQ */
3053 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3054 if (hdev->stop_on_err)
3055 tpc_qm_err_cfg |=
3056 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3057
3058 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3059
3060 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3061 lower_32_bits(CFG_BASE + irq_handler_offset));
3062 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3063 upper_32_bits(CFG_BASE + irq_handler_offset));
3064
3065 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3066 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3067 tpc_id);
3068
3069 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3070 QM_ARB_ERR_MSG_EN_MASK);
3071
3072 /* Set timeout to maximum */
3073 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3074
3075 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3076 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3077 QMAN_INTERNAL_MAKE_TRUSTED);
3078 }
3079
3080 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3082 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3084
3085 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3086 if (tpc_id == 6) {
3087 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3088 mtr_base_ws_lo);
3089 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3090 mtr_base_ws_hi);
3091 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3092 so_base_ws_lo);
3093 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3094 so_base_ws_hi);
3095 }
3096 }
3097
gaudi_init_tpc_qmans(struct hl_device * hdev)3098 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3099 {
3100 struct gaudi_device *gaudi = hdev->asic_specific;
3101 struct gaudi_internal_qman_info *q;
3102 u64 qman_base_addr;
3103 u32 so_base_hi, tpc_offset = 0;
3104 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3105 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3106 int i, tpc_id, internal_q_index;
3107
3108 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3109 return;
3110
3111 so_base_hi = upper_32_bits(CFG_BASE +
3112 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3113
3114 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3115 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3116 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3117 tpc_id * QMAN_STREAMS + i;
3118 q = &gaudi->internal_qmans[internal_q_index];
3119 qman_base_addr = (u64) q->pq_dma_addr;
3120 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3121 qman_base_addr);
3122
3123 if (i == 3) {
3124 /* Initializing lower CP for TPC QMAN */
3125 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3126
3127 /* Enable the QMAN and TPC channel */
3128 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3129 QMAN_TPC_ENABLE);
3130 }
3131 }
3132
3133 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3134 so_base_hi);
3135
3136 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3137
3138 gaudi->hw_cap_initialized |=
3139 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3140 }
3141 }
3142
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3143 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3144 int qman_id, u64 qman_base_addr, int nic_id)
3145 {
3146 struct cpu_dyn_regs *dyn_regs =
3147 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3148 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3149 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3150 u32 nic_qm_err_cfg, irq_handler_offset;
3151 u32 q_off;
3152
3153 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3154 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3155 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3156 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3159 so_base_en_hi = upper_32_bits(CFG_BASE +
3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3164 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167 so_base_ws_hi = upper_32_bits(CFG_BASE +
3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169
3170 q_off = nic_offset + qman_id * 4;
3171
3172 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3173 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3174
3175 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3176 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3177 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3178
3179 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3180 QMAN_LDMA_SIZE_OFFSET);
3181 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3182 QMAN_LDMA_SRC_OFFSET);
3183 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3184 QMAN_LDMA_DST_OFFSET);
3185
3186 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3190
3191 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3192 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3196
3197 if (qman_id == 0) {
3198 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3199 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3200 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3201
3202 /* Configure RAZWI IRQ */
3203 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3204 if (hdev->stop_on_err)
3205 nic_qm_err_cfg |=
3206 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3207
3208 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3209
3210 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3211 lower_32_bits(CFG_BASE + irq_handler_offset));
3212 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3213 upper_32_bits(CFG_BASE + irq_handler_offset));
3214
3215 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3216 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3217 nic_id);
3218
3219 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3220 QM_ARB_ERR_MSG_EN_MASK);
3221
3222 /* Set timeout to maximum */
3223 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3224
3225 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3226 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3227 QMAN_INTERNAL_MAKE_TRUSTED);
3228 }
3229 }
3230
gaudi_init_nic_qmans(struct hl_device * hdev)3231 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3232 {
3233 struct gaudi_device *gaudi = hdev->asic_specific;
3234 struct gaudi_internal_qman_info *q;
3235 u64 qman_base_addr;
3236 u32 nic_offset = 0;
3237 u32 nic_delta_between_qmans =
3238 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3239 u32 nic_delta_between_nics =
3240 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241 int i, nic_id, internal_q_index;
3242
3243 if (!hdev->nic_ports_mask)
3244 return;
3245
3246 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3247 return;
3248
3249 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3250
3251 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3252 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3253 nic_offset += nic_delta_between_qmans;
3254 if (nic_id & 1) {
3255 nic_offset -= (nic_delta_between_qmans * 2);
3256 nic_offset += nic_delta_between_nics;
3257 }
3258 continue;
3259 }
3260
3261 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3262 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3263 nic_id * QMAN_STREAMS + i;
3264 q = &gaudi->internal_qmans[internal_q_index];
3265 qman_base_addr = (u64) q->pq_dma_addr;
3266 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3267 qman_base_addr, nic_id);
3268 }
3269
3270 /* Enable the QMAN */
3271 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3272
3273 nic_offset += nic_delta_between_qmans;
3274 if (nic_id & 1) {
3275 nic_offset -= (nic_delta_between_qmans * 2);
3276 nic_offset += nic_delta_between_nics;
3277 }
3278
3279 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3280 }
3281 }
3282
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3283 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3284 {
3285 struct gaudi_device *gaudi = hdev->asic_specific;
3286
3287 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3288 return;
3289
3290 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3291 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3292 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3293 }
3294
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3295 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3296 {
3297 struct gaudi_device *gaudi = hdev->asic_specific;
3298
3299 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3300 return;
3301
3302 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3303 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3304 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3305 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3307 }
3308
gaudi_disable_mme_qmans(struct hl_device * hdev)3309 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3310 {
3311 struct gaudi_device *gaudi = hdev->asic_specific;
3312
3313 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3314 return;
3315
3316 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3317 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3318 }
3319
gaudi_disable_tpc_qmans(struct hl_device * hdev)3320 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3321 {
3322 struct gaudi_device *gaudi = hdev->asic_specific;
3323 u32 tpc_offset = 0;
3324 int tpc_id;
3325
3326 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3327 return;
3328
3329 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3330 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3331 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3332 }
3333 }
3334
gaudi_disable_nic_qmans(struct hl_device * hdev)3335 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3336 {
3337 struct gaudi_device *gaudi = hdev->asic_specific;
3338 u32 nic_mask, nic_offset = 0;
3339 u32 nic_delta_between_qmans =
3340 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3341 u32 nic_delta_between_nics =
3342 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343 int nic_id;
3344
3345 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3346 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3347
3348 if (gaudi->hw_cap_initialized & nic_mask)
3349 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3350
3351 nic_offset += nic_delta_between_qmans;
3352 if (nic_id & 1) {
3353 nic_offset -= (nic_delta_between_qmans * 2);
3354 nic_offset += nic_delta_between_nics;
3355 }
3356 }
3357 }
3358
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3359 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3360 {
3361 struct gaudi_device *gaudi = hdev->asic_specific;
3362
3363 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3364 return;
3365
3366 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3367 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3368 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 }
3371
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3372 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3373 {
3374 struct gaudi_device *gaudi = hdev->asic_specific;
3375
3376 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3377 return;
3378
3379 /* Stop CPs of HBM DMA QMANs */
3380
3381 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3382 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 }
3387
gaudi_stop_mme_qmans(struct hl_device * hdev)3388 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3389 {
3390 struct gaudi_device *gaudi = hdev->asic_specific;
3391
3392 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3393 return;
3394
3395 /* Stop CPs of MME QMANs */
3396 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3397 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 }
3399
gaudi_stop_tpc_qmans(struct hl_device * hdev)3400 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3401 {
3402 struct gaudi_device *gaudi = hdev->asic_specific;
3403
3404 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3405 return;
3406
3407 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3408 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 }
3416
gaudi_stop_nic_qmans(struct hl_device * hdev)3417 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3418 {
3419 struct gaudi_device *gaudi = hdev->asic_specific;
3420
3421 /* Stop upper CPs of QMANs */
3422
3423 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3424 WREG32(mmNIC0_QM0_GLBL_CFG1,
3425 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3426 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3427 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3428
3429 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3430 WREG32(mmNIC0_QM1_GLBL_CFG1,
3431 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3432 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3433 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3434
3435 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3436 WREG32(mmNIC1_QM0_GLBL_CFG1,
3437 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3438 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3439 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3440
3441 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3442 WREG32(mmNIC1_QM1_GLBL_CFG1,
3443 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3444 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3445 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3446
3447 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3448 WREG32(mmNIC2_QM0_GLBL_CFG1,
3449 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3450 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3451 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3452
3453 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3454 WREG32(mmNIC2_QM1_GLBL_CFG1,
3455 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3456 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3457 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3458
3459 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3460 WREG32(mmNIC3_QM0_GLBL_CFG1,
3461 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3462 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3463 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3464
3465 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3466 WREG32(mmNIC3_QM1_GLBL_CFG1,
3467 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3468 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3469 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3470
3471 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3472 WREG32(mmNIC4_QM0_GLBL_CFG1,
3473 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3474 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3475 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3476
3477 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3478 WREG32(mmNIC4_QM1_GLBL_CFG1,
3479 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3480 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3481 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3482 }
3483
gaudi_pci_dma_stall(struct hl_device * hdev)3484 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3485 {
3486 struct gaudi_device *gaudi = hdev->asic_specific;
3487
3488 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3489 return;
3490
3491 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3492 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 }
3495
gaudi_hbm_dma_stall(struct hl_device * hdev)3496 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3497 {
3498 struct gaudi_device *gaudi = hdev->asic_specific;
3499
3500 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3501 return;
3502
3503 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3504 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 }
3509
gaudi_mme_stall(struct hl_device * hdev)3510 static void gaudi_mme_stall(struct hl_device *hdev)
3511 {
3512 struct gaudi_device *gaudi = hdev->asic_specific;
3513
3514 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3515 return;
3516
3517 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3518 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 }
3535
gaudi_tpc_stall(struct hl_device * hdev)3536 static void gaudi_tpc_stall(struct hl_device *hdev)
3537 {
3538 struct gaudi_device *gaudi = hdev->asic_specific;
3539
3540 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3541 return;
3542
3543 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3544 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 }
3552
gaudi_disable_clock_gating(struct hl_device * hdev)3553 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3554 {
3555 u32 qman_offset;
3556 int i;
3557
3558 if (hdev->asic_prop.fw_security_enabled)
3559 return;
3560
3561 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3562 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3563 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3564
3565 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3566 }
3567
3568 WREG32(mmMME0_QM_CGM_CFG, 0);
3569 WREG32(mmMME0_QM_CGM_CFG1, 0);
3570 WREG32(mmMME2_QM_CGM_CFG, 0);
3571 WREG32(mmMME2_QM_CGM_CFG1, 0);
3572
3573 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3574 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3575 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3576
3577 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3578 }
3579 }
3580
gaudi_enable_timestamp(struct hl_device * hdev)3581 static void gaudi_enable_timestamp(struct hl_device *hdev)
3582 {
3583 /* Disable the timestamp counter */
3584 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3585
3586 /* Zero the lower/upper parts of the 64-bit counter */
3587 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3589
3590 /* Enable the counter */
3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3592 }
3593
gaudi_disable_timestamp(struct hl_device * hdev)3594 static void gaudi_disable_timestamp(struct hl_device *hdev)
3595 {
3596 /* Disable the timestamp counter */
3597 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3598 }
3599
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3600 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3601 {
3602 u32 wait_timeout_ms;
3603
3604 if (hdev->pldm)
3605 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3606 else
3607 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3608
3609 if (fw_reset)
3610 goto skip_engines;
3611
3612 gaudi_stop_nic_qmans(hdev);
3613 gaudi_stop_mme_qmans(hdev);
3614 gaudi_stop_tpc_qmans(hdev);
3615 gaudi_stop_hbm_dma_qmans(hdev);
3616 gaudi_stop_pci_dma_qmans(hdev);
3617
3618 msleep(wait_timeout_ms);
3619
3620 gaudi_pci_dma_stall(hdev);
3621 gaudi_hbm_dma_stall(hdev);
3622 gaudi_tpc_stall(hdev);
3623 gaudi_mme_stall(hdev);
3624
3625 msleep(wait_timeout_ms);
3626
3627 gaudi_disable_nic_qmans(hdev);
3628 gaudi_disable_mme_qmans(hdev);
3629 gaudi_disable_tpc_qmans(hdev);
3630 gaudi_disable_hbm_dma_qmans(hdev);
3631 gaudi_disable_pci_dma_qmans(hdev);
3632
3633 gaudi_disable_timestamp(hdev);
3634
3635 skip_engines:
3636 gaudi_disable_msi(hdev);
3637 }
3638
gaudi_mmu_init(struct hl_device * hdev)3639 static int gaudi_mmu_init(struct hl_device *hdev)
3640 {
3641 struct asic_fixed_properties *prop = &hdev->asic_prop;
3642 struct gaudi_device *gaudi = hdev->asic_specific;
3643 u64 hop0_addr;
3644 int rc, i;
3645
3646 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3647 return 0;
3648
3649 for (i = 0 ; i < prop->max_asid ; i++) {
3650 hop0_addr = prop->mmu_pgt_addr +
3651 (i * prop->mmu_hop_table_size);
3652
3653 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3654 if (rc) {
3655 dev_err(hdev->dev,
3656 "failed to set hop0 addr for asid %d\n", i);
3657 return rc;
3658 }
3659 }
3660
3661 /* init MMU cache manage page */
3662 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3663 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3664
3665 /* mem cache invalidation */
3666 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3667
3668 rc = hl_mmu_invalidate_cache(hdev, true, 0);
3669 if (rc)
3670 return rc;
3671
3672 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3673 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3674
3675 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3676
3677 /*
3678 * The H/W expects the first PI after init to be 1. After wraparound
3679 * we'll write 0.
3680 */
3681 gaudi->mmu_cache_inv_pi = 1;
3682
3683 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3684
3685 return 0;
3686 }
3687
gaudi_load_firmware_to_device(struct hl_device * hdev)3688 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3689 {
3690 void __iomem *dst;
3691
3692 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3693
3694 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3695 }
3696
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3697 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3698 {
3699 void __iomem *dst;
3700
3701 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3702
3703 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3704 }
3705
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3706 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3707 {
3708 struct dynamic_fw_load_mgr *dynamic_loader;
3709 struct cpu_dyn_regs *dyn_regs;
3710
3711 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3712
3713 /*
3714 * here we update initial values for few specific dynamic regs (as
3715 * before reading the first descriptor from FW those value has to be
3716 * hard-coded) in later stages of the protocol those values will be
3717 * updated automatically by reading the FW descriptor so data there
3718 * will always be up-to-date
3719 */
3720 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3721 dyn_regs->kmd_msg_to_cpu =
3722 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3723 dyn_regs->cpu_cmd_status_to_host =
3724 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3725
3726 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3727 }
3728
gaudi_init_static_firmware_loader(struct hl_device * hdev)3729 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3730 {
3731 struct static_fw_load_mgr *static_loader;
3732
3733 static_loader = &hdev->fw_loader.static_loader;
3734
3735 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3736 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3738 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3739 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3740 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3741 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3742 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3743 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3744 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3745 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3746 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3747 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3748 GAUDI_PLDM_RESET_WAIT_MSEC :
3749 GAUDI_CPU_RESET_WAIT_MSEC;
3750 }
3751
gaudi_init_firmware_preload_params(struct hl_device * hdev)3752 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3753 {
3754 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3755
3756 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3757 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3758 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3759 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3760 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3761 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3762 }
3763
gaudi_init_firmware_loader(struct hl_device * hdev)3764 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3765 {
3766 struct asic_fixed_properties *prop = &hdev->asic_prop;
3767 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3768
3769 /* fill common fields */
3770 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3771 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3772 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3773 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3774 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3775 fw_loader->skip_bmc = !hdev->bmc_enable;
3776 fw_loader->sram_bar_id = SRAM_BAR_ID;
3777 fw_loader->dram_bar_id = HBM_BAR_ID;
3778
3779 if (prop->dynamic_fw_load)
3780 gaudi_init_dynamic_firmware_loader(hdev);
3781 else
3782 gaudi_init_static_firmware_loader(hdev);
3783 }
3784
gaudi_init_cpu(struct hl_device * hdev)3785 static int gaudi_init_cpu(struct hl_device *hdev)
3786 {
3787 struct gaudi_device *gaudi = hdev->asic_specific;
3788 int rc;
3789
3790 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3791 return 0;
3792
3793 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3794 return 0;
3795
3796 /*
3797 * The device CPU works with 40 bits addresses.
3798 * This register sets the extension to 50 bits.
3799 */
3800 if (!hdev->asic_prop.fw_security_enabled)
3801 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3802
3803 rc = hl_fw_init_cpu(hdev);
3804
3805 if (rc)
3806 return rc;
3807
3808 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3809
3810 return 0;
3811 }
3812
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3813 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3814 {
3815 struct cpu_dyn_regs *dyn_regs =
3816 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3817 struct asic_fixed_properties *prop = &hdev->asic_prop;
3818 struct gaudi_device *gaudi = hdev->asic_specific;
3819 u32 status, irq_handler_offset;
3820 struct hl_eq *eq;
3821 struct hl_hw_queue *cpu_pq =
3822 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3823 int err;
3824
3825 if (!hdev->cpu_queues_enable)
3826 return 0;
3827
3828 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3829 return 0;
3830
3831 eq = &hdev->event_queue;
3832
3833 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3835
3836 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3838
3839 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3840 lower_32_bits(hdev->cpu_accessible_dma_address));
3841 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3842 upper_32_bits(hdev->cpu_accessible_dma_address));
3843
3844 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3845 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3846 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3847
3848 /* Used for EQ CI */
3849 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3850
3851 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3852
3853 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3854
3855 irq_handler_offset = prop->gic_interrupts_enable ?
3856 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3857 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3858
3859 WREG32(irq_handler_offset,
3860 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3861
3862 err = hl_poll_timeout(
3863 hdev,
3864 mmCPU_IF_QUEUE_INIT,
3865 status,
3866 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3867 1000,
3868 cpu_timeout);
3869
3870 if (err) {
3871 dev_err(hdev->dev,
3872 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3873 return -EIO;
3874 }
3875
3876 /* update FW application security bits */
3877 if (prop->fw_cpu_boot_dev_sts0_valid)
3878 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3879 if (prop->fw_cpu_boot_dev_sts1_valid)
3880 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3881
3882 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3883 return 0;
3884 }
3885
gaudi_pre_hw_init(struct hl_device * hdev)3886 static void gaudi_pre_hw_init(struct hl_device *hdev)
3887 {
3888 /* Perform read from the device to make sure device is up */
3889 RREG32(mmHW_STATE);
3890
3891 if (!hdev->asic_prop.fw_security_enabled) {
3892 /* Set the access through PCI bars (Linux driver only) as
3893 * secured
3894 */
3895 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3896 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3897 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3898
3899 /* Perform read to flush the waiting writes to ensure
3900 * configuration was set in the device
3901 */
3902 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3903 }
3904
3905 /*
3906 * Let's mark in the H/W that we have reached this point. We check
3907 * this value in the reset_before_init function to understand whether
3908 * we need to reset the chip before doing H/W init. This register is
3909 * cleared by the H/W upon H/W reset
3910 */
3911 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3912 }
3913
gaudi_hw_init(struct hl_device * hdev)3914 static int gaudi_hw_init(struct hl_device *hdev)
3915 {
3916 struct gaudi_device *gaudi = hdev->asic_specific;
3917 int rc;
3918
3919 gaudi_pre_hw_init(hdev);
3920
3921 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3922 * So we set it here and if anyone tries to move it later to
3923 * a different address, there will be an error
3924 */
3925 if (hdev->asic_prop.iatu_done_by_fw)
3926 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3927
3928 /*
3929 * Before pushing u-boot/linux to device, need to set the hbm bar to
3930 * base address of dram
3931 */
3932 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3933 dev_err(hdev->dev,
3934 "failed to map HBM bar to DRAM base address\n");
3935 return -EIO;
3936 }
3937
3938 rc = gaudi_init_cpu(hdev);
3939 if (rc) {
3940 dev_err(hdev->dev, "failed to initialize CPU\n");
3941 return rc;
3942 }
3943
3944 /* In case the clock gating was enabled in preboot we need to disable
3945 * it here before touching the MME/TPC registers.
3946 */
3947 gaudi_disable_clock_gating(hdev);
3948
3949 /* SRAM scrambler must be initialized after CPU is running from HBM */
3950 gaudi_init_scrambler_sram(hdev);
3951
3952 /* This is here just in case we are working without CPU */
3953 gaudi_init_scrambler_hbm(hdev);
3954
3955 gaudi_init_golden_registers(hdev);
3956
3957 rc = gaudi_mmu_init(hdev);
3958 if (rc)
3959 return rc;
3960
3961 gaudi_init_security(hdev);
3962
3963 gaudi_init_pci_dma_qmans(hdev);
3964
3965 gaudi_init_hbm_dma_qmans(hdev);
3966
3967 gaudi_init_mme_qmans(hdev);
3968
3969 gaudi_init_tpc_qmans(hdev);
3970
3971 gaudi_init_nic_qmans(hdev);
3972
3973 gaudi_enable_timestamp(hdev);
3974
3975 /* MSI must be enabled before CPU queues and NIC are initialized */
3976 rc = gaudi_enable_msi(hdev);
3977 if (rc)
3978 goto disable_queues;
3979
3980 /* must be called after MSI was enabled */
3981 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3982 if (rc) {
3983 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3984 rc);
3985 goto disable_msi;
3986 }
3987
3988 /* Perform read from the device to flush all configuration */
3989 RREG32(mmHW_STATE);
3990
3991 return 0;
3992
3993 disable_msi:
3994 gaudi_disable_msi(hdev);
3995 disable_queues:
3996 gaudi_disable_mme_qmans(hdev);
3997 gaudi_disable_pci_dma_qmans(hdev);
3998
3999 return rc;
4000 }
4001
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4002 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4003 {
4004 struct cpu_dyn_regs *dyn_regs =
4005 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4006 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4007 struct gaudi_device *gaudi = hdev->asic_specific;
4008 bool driver_performs_reset;
4009
4010 if (!hard_reset) {
4011 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4012 return 0;
4013 }
4014
4015 if (hdev->pldm) {
4016 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4017 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4018 } else {
4019 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4020 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4021 }
4022
4023 if (fw_reset) {
4024 dev_dbg(hdev->dev,
4025 "Firmware performs HARD reset, going to wait %dms\n",
4026 reset_timeout_ms);
4027
4028 goto skip_reset;
4029 }
4030
4031 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4032 !hdev->asic_prop.hard_reset_done_by_fw);
4033
4034 /* Set device to handle FLR by H/W as we will put the device CPU to
4035 * halt mode
4036 */
4037 if (driver_performs_reset)
4038 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4039 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4040
4041 /* If linux is loaded in the device CPU we need to communicate with it
4042 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4043 * registers in case of old F/Ws
4044 */
4045 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4046 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4047 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4048 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4049
4050 WREG32(irq_handler_offset,
4051 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4052
4053 /* This is a hail-mary attempt to revive the card in the small chance that the
4054 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4055 * In that case, triggering reset through GIC won't help. We need to trigger the
4056 * reset as if Linux wasn't loaded.
4057 *
4058 * We do it only if the reset cause was HB, because that would be the indication
4059 * of such an event.
4060 *
4061 * In case watchdog hasn't expired but we still got HB, then this won't do any
4062 * damage.
4063 */
4064 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4065 if (hdev->asic_prop.hard_reset_done_by_fw)
4066 hl_fw_ask_hard_reset_without_linux(hdev);
4067 else
4068 hl_fw_ask_halt_machine_without_linux(hdev);
4069 }
4070 } else {
4071 if (hdev->asic_prop.hard_reset_done_by_fw)
4072 hl_fw_ask_hard_reset_without_linux(hdev);
4073 else
4074 hl_fw_ask_halt_machine_without_linux(hdev);
4075 }
4076
4077 if (driver_performs_reset) {
4078
4079 /* Configure the reset registers. Must be done as early as
4080 * possible in case we fail during H/W initialization
4081 */
4082 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4083 (CFG_RST_H_DMA_MASK |
4084 CFG_RST_H_MME_MASK |
4085 CFG_RST_H_SM_MASK |
4086 CFG_RST_H_TPC_7_MASK));
4087
4088 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4089
4090 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4091 (CFG_RST_H_HBM_MASK |
4092 CFG_RST_H_TPC_7_MASK |
4093 CFG_RST_H_NIC_MASK |
4094 CFG_RST_H_SM_MASK |
4095 CFG_RST_H_DMA_MASK |
4096 CFG_RST_H_MME_MASK |
4097 CFG_RST_H_CPU_MASK |
4098 CFG_RST_H_MMU_MASK));
4099
4100 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4101 (CFG_RST_L_IF_MASK |
4102 CFG_RST_L_PSOC_MASK |
4103 CFG_RST_L_TPC_MASK));
4104
4105 msleep(cpu_timeout_ms);
4106
4107 /* Tell ASIC not to re-initialize PCIe */
4108 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4109
4110 /* Restart BTL/BLR upon hard-reset */
4111 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4112
4113 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4114 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4115
4116 dev_dbg(hdev->dev,
4117 "Issued HARD reset command, going to wait %dms\n",
4118 reset_timeout_ms);
4119 } else {
4120 dev_dbg(hdev->dev,
4121 "Firmware performs HARD reset, going to wait %dms\n",
4122 reset_timeout_ms);
4123 }
4124
4125 skip_reset:
4126 /*
4127 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4128 * itself is in reset. Need to wait until the reset is deasserted
4129 */
4130 msleep(reset_timeout_ms);
4131
4132 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4133 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4134 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4135 return -ETIMEDOUT;
4136 }
4137
4138 if (gaudi) {
4139 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4140 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4141 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4142 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4143 HW_CAP_HBM_SCRAMBLER);
4144
4145 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4146
4147 hdev->device_cpu_is_halted = false;
4148 }
4149 return 0;
4150 }
4151
gaudi_suspend(struct hl_device * hdev)4152 static int gaudi_suspend(struct hl_device *hdev)
4153 {
4154 int rc;
4155
4156 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4157 if (rc)
4158 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4159
4160 return rc;
4161 }
4162
gaudi_resume(struct hl_device * hdev)4163 static int gaudi_resume(struct hl_device *hdev)
4164 {
4165 return gaudi_init_iatu(hdev);
4166 }
4167
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4168 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4169 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4170 {
4171 int rc;
4172
4173 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4174 VM_DONTCOPY | VM_NORESERVE);
4175
4176 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4177 (dma_addr - HOST_PHYS_BASE), size);
4178 if (rc)
4179 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4180
4181 return rc;
4182 }
4183
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4184 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4185 {
4186 struct cpu_dyn_regs *dyn_regs =
4187 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4188 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4189 struct gaudi_device *gaudi = hdev->asic_specific;
4190 bool invalid_queue = false;
4191 int dma_id;
4192
4193 switch (hw_queue_id) {
4194 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4195 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4196 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4197 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4198 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4199 break;
4200
4201 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4202 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4203 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4204 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4205 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4206 break;
4207
4208 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4209 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4210 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4211 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4212 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4213 break;
4214
4215 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4216 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4217 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4218 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4219 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4220 break;
4221
4222 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4223 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4224 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4225 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4226 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4227 break;
4228
4229 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4230 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4231 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4232 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4233 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4234 break;
4235
4236 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4237 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4238 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4239 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4240 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4241 break;
4242
4243 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4244 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4245 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4247 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248 break;
4249
4250 case GAUDI_QUEUE_ID_CPU_PQ:
4251 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4252 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4253 else
4254 invalid_queue = true;
4255 break;
4256
4257 case GAUDI_QUEUE_ID_MME_0_0:
4258 db_reg_offset = mmMME2_QM_PQ_PI_0;
4259 break;
4260
4261 case GAUDI_QUEUE_ID_MME_0_1:
4262 db_reg_offset = mmMME2_QM_PQ_PI_1;
4263 break;
4264
4265 case GAUDI_QUEUE_ID_MME_0_2:
4266 db_reg_offset = mmMME2_QM_PQ_PI_2;
4267 break;
4268
4269 case GAUDI_QUEUE_ID_MME_0_3:
4270 db_reg_offset = mmMME2_QM_PQ_PI_3;
4271 break;
4272
4273 case GAUDI_QUEUE_ID_MME_1_0:
4274 db_reg_offset = mmMME0_QM_PQ_PI_0;
4275 break;
4276
4277 case GAUDI_QUEUE_ID_MME_1_1:
4278 db_reg_offset = mmMME0_QM_PQ_PI_1;
4279 break;
4280
4281 case GAUDI_QUEUE_ID_MME_1_2:
4282 db_reg_offset = mmMME0_QM_PQ_PI_2;
4283 break;
4284
4285 case GAUDI_QUEUE_ID_MME_1_3:
4286 db_reg_offset = mmMME0_QM_PQ_PI_3;
4287 break;
4288
4289 case GAUDI_QUEUE_ID_TPC_0_0:
4290 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4291 break;
4292
4293 case GAUDI_QUEUE_ID_TPC_0_1:
4294 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4295 break;
4296
4297 case GAUDI_QUEUE_ID_TPC_0_2:
4298 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4299 break;
4300
4301 case GAUDI_QUEUE_ID_TPC_0_3:
4302 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4303 break;
4304
4305 case GAUDI_QUEUE_ID_TPC_1_0:
4306 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4307 break;
4308
4309 case GAUDI_QUEUE_ID_TPC_1_1:
4310 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4311 break;
4312
4313 case GAUDI_QUEUE_ID_TPC_1_2:
4314 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4315 break;
4316
4317 case GAUDI_QUEUE_ID_TPC_1_3:
4318 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4319 break;
4320
4321 case GAUDI_QUEUE_ID_TPC_2_0:
4322 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4323 break;
4324
4325 case GAUDI_QUEUE_ID_TPC_2_1:
4326 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4327 break;
4328
4329 case GAUDI_QUEUE_ID_TPC_2_2:
4330 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4331 break;
4332
4333 case GAUDI_QUEUE_ID_TPC_2_3:
4334 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4335 break;
4336
4337 case GAUDI_QUEUE_ID_TPC_3_0:
4338 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4339 break;
4340
4341 case GAUDI_QUEUE_ID_TPC_3_1:
4342 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4343 break;
4344
4345 case GAUDI_QUEUE_ID_TPC_3_2:
4346 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4347 break;
4348
4349 case GAUDI_QUEUE_ID_TPC_3_3:
4350 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4351 break;
4352
4353 case GAUDI_QUEUE_ID_TPC_4_0:
4354 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4355 break;
4356
4357 case GAUDI_QUEUE_ID_TPC_4_1:
4358 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4359 break;
4360
4361 case GAUDI_QUEUE_ID_TPC_4_2:
4362 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4363 break;
4364
4365 case GAUDI_QUEUE_ID_TPC_4_3:
4366 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4367 break;
4368
4369 case GAUDI_QUEUE_ID_TPC_5_0:
4370 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4371 break;
4372
4373 case GAUDI_QUEUE_ID_TPC_5_1:
4374 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4375 break;
4376
4377 case GAUDI_QUEUE_ID_TPC_5_2:
4378 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4379 break;
4380
4381 case GAUDI_QUEUE_ID_TPC_5_3:
4382 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4383 break;
4384
4385 case GAUDI_QUEUE_ID_TPC_6_0:
4386 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4387 break;
4388
4389 case GAUDI_QUEUE_ID_TPC_6_1:
4390 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4391 break;
4392
4393 case GAUDI_QUEUE_ID_TPC_6_2:
4394 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4395 break;
4396
4397 case GAUDI_QUEUE_ID_TPC_6_3:
4398 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4399 break;
4400
4401 case GAUDI_QUEUE_ID_TPC_7_0:
4402 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4403 break;
4404
4405 case GAUDI_QUEUE_ID_TPC_7_1:
4406 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4407 break;
4408
4409 case GAUDI_QUEUE_ID_TPC_7_2:
4410 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4411 break;
4412
4413 case GAUDI_QUEUE_ID_TPC_7_3:
4414 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4415 break;
4416
4417 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4418 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4419 invalid_queue = true;
4420
4421 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4422 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4423 break;
4424
4425 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4426 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4427 invalid_queue = true;
4428
4429 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4430 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4431 break;
4432
4433 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4434 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4435 invalid_queue = true;
4436
4437 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4438 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4439 break;
4440
4441 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4442 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4443 invalid_queue = true;
4444
4445 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4446 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4447 break;
4448
4449 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4450 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4451 invalid_queue = true;
4452
4453 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4454 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4455 break;
4456
4457 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4458 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4459 invalid_queue = true;
4460
4461 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4462 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4463 break;
4464
4465 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4466 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4467 invalid_queue = true;
4468
4469 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4470 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4471 break;
4472
4473 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4474 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4475 invalid_queue = true;
4476
4477 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4478 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4479 break;
4480
4481 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4482 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4483 invalid_queue = true;
4484
4485 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4486 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4487 break;
4488
4489 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4490 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4491 invalid_queue = true;
4492
4493 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4494 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4495 break;
4496
4497 default:
4498 invalid_queue = true;
4499 }
4500
4501 if (invalid_queue) {
4502 /* Should never get here */
4503 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4504 hw_queue_id);
4505 return;
4506 }
4507
4508 db_value = pi;
4509
4510 /* ring the doorbell */
4511 WREG32(db_reg_offset, db_value);
4512
4513 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4514 /* make sure device CPU will read latest data from host */
4515 mb();
4516
4517 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4518 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4519 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4520
4521 WREG32(irq_handler_offset,
4522 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4523 }
4524 }
4525
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4526 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4527 struct hl_bd *bd)
4528 {
4529 __le64 *pbd = (__le64 *) bd;
4530
4531 /* The QMANs are on the host memory so a simple copy suffice */
4532 pqe[0] = pbd[0];
4533 pqe[1] = pbd[1];
4534 }
4535
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4536 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4537 dma_addr_t *dma_handle, gfp_t flags)
4538 {
4539 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4540 dma_handle, flags);
4541
4542 /* Shift to the device's base physical address of host memory */
4543 if (kernel_addr)
4544 *dma_handle += HOST_PHYS_BASE;
4545
4546 return kernel_addr;
4547 }
4548
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4549 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4550 void *cpu_addr, dma_addr_t dma_handle)
4551 {
4552 /* Cancel the device's base physical address of host memory */
4553 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4554
4555 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4556 }
4557
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4558 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4559 {
4560 struct asic_fixed_properties *prop = &hdev->asic_prop;
4561 u64 cur_addr = prop->dram_user_base_address;
4562 u32 chunk_size, busy;
4563 int rc, dma_id;
4564
4565 while (cur_addr < prop->dram_end_address) {
4566 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4567 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4568
4569 chunk_size =
4570 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4571
4572 dev_dbg(hdev->dev,
4573 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4574 cur_addr, cur_addr + chunk_size);
4575
4576 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4577 lower_32_bits(val));
4578 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4579 upper_32_bits(val));
4580 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4581 lower_32_bits(cur_addr));
4582 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4583 upper_32_bits(cur_addr));
4584 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4585 chunk_size);
4586 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4587 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4588 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4589
4590 cur_addr += chunk_size;
4591
4592 if (cur_addr == prop->dram_end_address)
4593 break;
4594 }
4595
4596 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4597 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4598
4599 rc = hl_poll_timeout(
4600 hdev,
4601 mmDMA0_CORE_STS0 + dma_offset,
4602 busy,
4603 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4604 1000,
4605 HBM_SCRUBBING_TIMEOUT_US);
4606
4607 if (rc) {
4608 dev_err(hdev->dev,
4609 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4610 dma_id);
4611 return -EIO;
4612 }
4613 }
4614 }
4615
4616 return 0;
4617 }
4618
gaudi_scrub_device_mem(struct hl_device * hdev)4619 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4620 {
4621 struct asic_fixed_properties *prop = &hdev->asic_prop;
4622 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4623 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4624 u64 addr, size, val = hdev->memory_scrub_val;
4625 ktime_t timeout;
4626 int rc = 0;
4627
4628 if (!hdev->memory_scrub)
4629 return 0;
4630
4631 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4632 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4633 if (ktime_compare(ktime_get(), timeout) > 0) {
4634 dev_err(hdev->dev, "waiting for idle timeout\n");
4635 return -ETIMEDOUT;
4636 }
4637 usleep_range((1000 >> 2) + 1, 1000);
4638 }
4639
4640 /* Scrub SRAM */
4641 addr = prop->sram_user_base_address;
4642 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4643
4644 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4645 addr, addr + size, val);
4646 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4647 if (rc) {
4648 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4649 return rc;
4650 }
4651
4652 /* Scrub HBM using all DMA channels in parallel */
4653 rc = gaudi_scrub_device_dram(hdev, val);
4654 if (rc) {
4655 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4656 return rc;
4657 }
4658
4659 return 0;
4660 }
4661
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4662 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4663 u32 queue_id, dma_addr_t *dma_handle,
4664 u16 *queue_len)
4665 {
4666 struct gaudi_device *gaudi = hdev->asic_specific;
4667 struct gaudi_internal_qman_info *q;
4668
4669 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4670 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4671 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4672 return NULL;
4673 }
4674
4675 q = &gaudi->internal_qmans[queue_id];
4676 *dma_handle = q->pq_dma_addr;
4677 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4678
4679 return q->pq_kernel_addr;
4680 }
4681
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4682 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4683 u16 len, u32 timeout, u64 *result)
4684 {
4685 struct gaudi_device *gaudi = hdev->asic_specific;
4686
4687 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4688 if (result)
4689 *result = 0;
4690 return 0;
4691 }
4692
4693 if (!timeout)
4694 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4695
4696 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4697 timeout, result);
4698 }
4699
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4700 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4701 {
4702 struct packet_msg_prot *fence_pkt;
4703 dma_addr_t pkt_dma_addr;
4704 u32 fence_val, tmp, timeout_usec;
4705 dma_addr_t fence_dma_addr;
4706 u32 *fence_ptr;
4707 int rc;
4708
4709 if (hdev->pldm)
4710 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4711 else
4712 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4713
4714 fence_val = GAUDI_QMAN0_FENCE_VAL;
4715
4716 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4717 if (!fence_ptr) {
4718 dev_err(hdev->dev,
4719 "Failed to allocate memory for H/W queue %d testing\n",
4720 hw_queue_id);
4721 return -ENOMEM;
4722 }
4723
4724 *fence_ptr = 0;
4725
4726 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4727 &pkt_dma_addr);
4728 if (!fence_pkt) {
4729 dev_err(hdev->dev,
4730 "Failed to allocate packet for H/W queue %d testing\n",
4731 hw_queue_id);
4732 rc = -ENOMEM;
4733 goto free_fence_ptr;
4734 }
4735
4736 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4737 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4738 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4739
4740 fence_pkt->ctl = cpu_to_le32(tmp);
4741 fence_pkt->value = cpu_to_le32(fence_val);
4742 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4743
4744 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4745 sizeof(struct packet_msg_prot),
4746 pkt_dma_addr);
4747 if (rc) {
4748 dev_err(hdev->dev,
4749 "Failed to send fence packet to H/W queue %d\n",
4750 hw_queue_id);
4751 goto free_pkt;
4752 }
4753
4754 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4755 1000, timeout_usec, true);
4756
4757 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4758
4759 if (rc == -ETIMEDOUT) {
4760 dev_err(hdev->dev,
4761 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4762 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4763 rc = -EIO;
4764 }
4765
4766 free_pkt:
4767 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4768 free_fence_ptr:
4769 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4770 return rc;
4771 }
4772
gaudi_test_cpu_queue(struct hl_device * hdev)4773 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4774 {
4775 struct gaudi_device *gaudi = hdev->asic_specific;
4776
4777 /*
4778 * check capability here as send_cpu_message() won't update the result
4779 * value if no capability
4780 */
4781 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4782 return 0;
4783
4784 return hl_fw_test_cpu_queue(hdev);
4785 }
4786
gaudi_test_queues(struct hl_device * hdev)4787 static int gaudi_test_queues(struct hl_device *hdev)
4788 {
4789 int i, rc, ret_val = 0;
4790
4791 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4792 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4793 rc = gaudi_test_queue(hdev, i);
4794 if (rc)
4795 ret_val = -EINVAL;
4796 }
4797 }
4798
4799 rc = gaudi_test_cpu_queue(hdev);
4800 if (rc)
4801 ret_val = -EINVAL;
4802
4803 return ret_val;
4804 }
4805
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4806 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4807 gfp_t mem_flags, dma_addr_t *dma_handle)
4808 {
4809 void *kernel_addr;
4810
4811 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4812 return NULL;
4813
4814 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4815
4816 /* Shift to the device's base physical address of host memory */
4817 if (kernel_addr)
4818 *dma_handle += HOST_PHYS_BASE;
4819
4820 return kernel_addr;
4821 }
4822
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4823 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4824 dma_addr_t dma_addr)
4825 {
4826 /* Cancel the device's base physical address of host memory */
4827 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4828
4829 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4830 }
4831
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4832 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4833 size_t size, dma_addr_t *dma_handle)
4834 {
4835 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4836 }
4837
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4838 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4839 size_t size, void *vaddr)
4840 {
4841 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4842 }
4843
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4844 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4845 {
4846 struct scatterlist *sg, *sg_next_iter;
4847 u32 count, dma_desc_cnt;
4848 u64 len, len_next;
4849 dma_addr_t addr, addr_next;
4850
4851 dma_desc_cnt = 0;
4852
4853 for_each_sgtable_dma_sg(sgt, sg, count) {
4854 len = sg_dma_len(sg);
4855 addr = sg_dma_address(sg);
4856
4857 if (len == 0)
4858 break;
4859
4860 while ((count + 1) < sgt->nents) {
4861 sg_next_iter = sg_next(sg);
4862 len_next = sg_dma_len(sg_next_iter);
4863 addr_next = sg_dma_address(sg_next_iter);
4864
4865 if (len_next == 0)
4866 break;
4867
4868 if ((addr + len == addr_next) &&
4869 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4870 len += len_next;
4871 count++;
4872 sg = sg_next_iter;
4873 } else {
4874 break;
4875 }
4876 }
4877
4878 dma_desc_cnt++;
4879 }
4880
4881 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4882 }
4883
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4884 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4885 struct hl_cs_parser *parser,
4886 struct packet_lin_dma *user_dma_pkt,
4887 u64 addr, enum dma_data_direction dir)
4888 {
4889 struct hl_userptr *userptr;
4890 int rc;
4891
4892 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4893 parser->job_userptr_list, &userptr))
4894 goto already_pinned;
4895
4896 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4897 if (!userptr)
4898 return -ENOMEM;
4899
4900 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4901 userptr);
4902 if (rc)
4903 goto free_userptr;
4904
4905 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4906
4907 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4908 if (rc) {
4909 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4910 goto unpin_memory;
4911 }
4912
4913 userptr->dma_mapped = true;
4914 userptr->dir = dir;
4915
4916 already_pinned:
4917 parser->patched_cb_size +=
4918 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4919
4920 return 0;
4921
4922 unpin_memory:
4923 list_del(&userptr->job_node);
4924 hl_unpin_host_memory(hdev, userptr);
4925 free_userptr:
4926 kfree(userptr);
4927 return rc;
4928 }
4929
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4930 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4931 struct hl_cs_parser *parser,
4932 struct packet_lin_dma *user_dma_pkt,
4933 bool src_in_host)
4934 {
4935 enum dma_data_direction dir;
4936 bool skip_host_mem_pin = false, user_memset;
4937 u64 addr;
4938 int rc = 0;
4939
4940 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4941 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4942 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4943
4944 if (src_in_host) {
4945 if (user_memset)
4946 skip_host_mem_pin = true;
4947
4948 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4949 dir = DMA_TO_DEVICE;
4950 addr = le64_to_cpu(user_dma_pkt->src_addr);
4951 } else {
4952 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4953 dir = DMA_FROM_DEVICE;
4954 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4955 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4956 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4957 }
4958
4959 if (skip_host_mem_pin)
4960 parser->patched_cb_size += sizeof(*user_dma_pkt);
4961 else
4962 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4963 addr, dir);
4964
4965 return rc;
4966 }
4967
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)4968 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4969 struct hl_cs_parser *parser,
4970 struct packet_lin_dma *user_dma_pkt)
4971 {
4972 bool src_in_host = false;
4973 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4974 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4975 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4976
4977 dev_dbg(hdev->dev, "DMA packet details:\n");
4978 dev_dbg(hdev->dev, "source == 0x%llx\n",
4979 le64_to_cpu(user_dma_pkt->src_addr));
4980 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4981 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4982
4983 /*
4984 * Special handling for DMA with size 0. Bypass all validations
4985 * because no transactions will be done except for WR_COMP, which
4986 * is not a security issue
4987 */
4988 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4989 parser->patched_cb_size += sizeof(*user_dma_pkt);
4990 return 0;
4991 }
4992
4993 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4994 src_in_host = true;
4995
4996 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4997 src_in_host);
4998 }
4999
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5000 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5001 struct hl_cs_parser *parser,
5002 struct packet_load_and_exe *user_pkt)
5003 {
5004 u32 cfg;
5005
5006 cfg = le32_to_cpu(user_pkt->cfg);
5007
5008 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5009 dev_err(hdev->dev,
5010 "User not allowed to use Load and Execute\n");
5011 return -EPERM;
5012 }
5013
5014 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5015
5016 return 0;
5017 }
5018
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5019 static int gaudi_validate_cb(struct hl_device *hdev,
5020 struct hl_cs_parser *parser, bool is_mmu)
5021 {
5022 u32 cb_parsed_length = 0;
5023 int rc = 0;
5024
5025 parser->patched_cb_size = 0;
5026
5027 /* cb_user_size is more than 0 so loop will always be executed */
5028 while (cb_parsed_length < parser->user_cb_size) {
5029 enum packet_id pkt_id;
5030 u16 pkt_size;
5031 struct gaudi_packet *user_pkt;
5032
5033 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5034
5035 pkt_id = (enum packet_id) (
5036 (le64_to_cpu(user_pkt->header) &
5037 PACKET_HEADER_PACKET_ID_MASK) >>
5038 PACKET_HEADER_PACKET_ID_SHIFT);
5039
5040 if (!validate_packet_id(pkt_id)) {
5041 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5042 rc = -EINVAL;
5043 break;
5044 }
5045
5046 pkt_size = gaudi_packet_sizes[pkt_id];
5047 cb_parsed_length += pkt_size;
5048 if (cb_parsed_length > parser->user_cb_size) {
5049 dev_err(hdev->dev,
5050 "packet 0x%x is out of CB boundary\n", pkt_id);
5051 rc = -EINVAL;
5052 break;
5053 }
5054
5055 switch (pkt_id) {
5056 case PACKET_MSG_PROT:
5057 dev_err(hdev->dev,
5058 "User not allowed to use MSG_PROT\n");
5059 rc = -EPERM;
5060 break;
5061
5062 case PACKET_CP_DMA:
5063 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5064 rc = -EPERM;
5065 break;
5066
5067 case PACKET_STOP:
5068 dev_err(hdev->dev, "User not allowed to use STOP\n");
5069 rc = -EPERM;
5070 break;
5071
5072 case PACKET_WREG_BULK:
5073 dev_err(hdev->dev,
5074 "User not allowed to use WREG_BULK\n");
5075 rc = -EPERM;
5076 break;
5077
5078 case PACKET_LOAD_AND_EXE:
5079 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5080 (struct packet_load_and_exe *) user_pkt);
5081 break;
5082
5083 case PACKET_LIN_DMA:
5084 parser->contains_dma_pkt = true;
5085 if (is_mmu)
5086 parser->patched_cb_size += pkt_size;
5087 else
5088 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5089 (struct packet_lin_dma *) user_pkt);
5090 break;
5091
5092 case PACKET_WREG_32:
5093 case PACKET_MSG_LONG:
5094 case PACKET_MSG_SHORT:
5095 case PACKET_REPEAT:
5096 case PACKET_FENCE:
5097 case PACKET_NOP:
5098 case PACKET_ARB_POINT:
5099 parser->patched_cb_size += pkt_size;
5100 break;
5101
5102 default:
5103 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5104 pkt_id);
5105 rc = -EINVAL;
5106 break;
5107 }
5108
5109 if (rc)
5110 break;
5111 }
5112
5113 /*
5114 * The new CB should have space at the end for two MSG_PROT packets:
5115 * 1. Optional NOP padding for cacheline alignment
5116 * 2. A packet that will act as a completion packet
5117 * 3. A packet that will generate MSI interrupt
5118 */
5119 if (parser->completion)
5120 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5121 parser->patched_cb_size);
5122
5123 return rc;
5124 }
5125
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5126 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5127 struct hl_cs_parser *parser,
5128 struct packet_lin_dma *user_dma_pkt,
5129 struct packet_lin_dma *new_dma_pkt,
5130 u32 *new_dma_pkt_size)
5131 {
5132 struct hl_userptr *userptr;
5133 struct scatterlist *sg, *sg_next_iter;
5134 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5135 u64 len, len_next;
5136 dma_addr_t dma_addr, dma_addr_next;
5137 u64 device_memory_addr, addr;
5138 enum dma_data_direction dir;
5139 struct sg_table *sgt;
5140 bool src_in_host = false;
5141 bool skip_host_mem_pin = false;
5142 bool user_memset;
5143
5144 ctl = le32_to_cpu(user_dma_pkt->ctl);
5145
5146 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5147 src_in_host = true;
5148
5149 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5150 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5151
5152 if (src_in_host) {
5153 addr = le64_to_cpu(user_dma_pkt->src_addr);
5154 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5155 dir = DMA_TO_DEVICE;
5156 if (user_memset)
5157 skip_host_mem_pin = true;
5158 } else {
5159 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5160 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5161 dir = DMA_FROM_DEVICE;
5162 }
5163
5164 if ((!skip_host_mem_pin) &&
5165 (!hl_userptr_is_pinned(hdev, addr,
5166 le32_to_cpu(user_dma_pkt->tsize),
5167 parser->job_userptr_list, &userptr))) {
5168 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5169 addr, user_dma_pkt->tsize);
5170 return -EFAULT;
5171 }
5172
5173 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5174 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5175 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5176 return 0;
5177 }
5178
5179 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5180
5181 sgt = userptr->sgt;
5182 dma_desc_cnt = 0;
5183
5184 for_each_sgtable_dma_sg(sgt, sg, count) {
5185 len = sg_dma_len(sg);
5186 dma_addr = sg_dma_address(sg);
5187
5188 if (len == 0)
5189 break;
5190
5191 while ((count + 1) < sgt->nents) {
5192 sg_next_iter = sg_next(sg);
5193 len_next = sg_dma_len(sg_next_iter);
5194 dma_addr_next = sg_dma_address(sg_next_iter);
5195
5196 if (len_next == 0)
5197 break;
5198
5199 if ((dma_addr + len == dma_addr_next) &&
5200 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5201 len += len_next;
5202 count++;
5203 sg = sg_next_iter;
5204 } else {
5205 break;
5206 }
5207 }
5208
5209 ctl = le32_to_cpu(user_dma_pkt->ctl);
5210 if (likely(dma_desc_cnt))
5211 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5212 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5213 new_dma_pkt->ctl = cpu_to_le32(ctl);
5214 new_dma_pkt->tsize = cpu_to_le32(len);
5215
5216 if (dir == DMA_TO_DEVICE) {
5217 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5218 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5219 } else {
5220 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5221 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5222 }
5223
5224 if (!user_memset)
5225 device_memory_addr += len;
5226 dma_desc_cnt++;
5227 new_dma_pkt++;
5228 }
5229
5230 if (!dma_desc_cnt) {
5231 dev_err(hdev->dev,
5232 "Error of 0 SG entries when patching DMA packet\n");
5233 return -EFAULT;
5234 }
5235
5236 /* Fix the last dma packet - wrcomp must be as user set it */
5237 new_dma_pkt--;
5238 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5239
5240 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5241
5242 return 0;
5243 }
5244
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5245 static int gaudi_patch_cb(struct hl_device *hdev,
5246 struct hl_cs_parser *parser)
5247 {
5248 u32 cb_parsed_length = 0;
5249 u32 cb_patched_cur_length = 0;
5250 int rc = 0;
5251
5252 /* cb_user_size is more than 0 so loop will always be executed */
5253 while (cb_parsed_length < parser->user_cb_size) {
5254 enum packet_id pkt_id;
5255 u16 pkt_size;
5256 u32 new_pkt_size = 0;
5257 struct gaudi_packet *user_pkt, *kernel_pkt;
5258
5259 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5260 kernel_pkt = parser->patched_cb->kernel_address +
5261 cb_patched_cur_length;
5262
5263 pkt_id = (enum packet_id) (
5264 (le64_to_cpu(user_pkt->header) &
5265 PACKET_HEADER_PACKET_ID_MASK) >>
5266 PACKET_HEADER_PACKET_ID_SHIFT);
5267
5268 if (!validate_packet_id(pkt_id)) {
5269 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5270 rc = -EINVAL;
5271 break;
5272 }
5273
5274 pkt_size = gaudi_packet_sizes[pkt_id];
5275 cb_parsed_length += pkt_size;
5276 if (cb_parsed_length > parser->user_cb_size) {
5277 dev_err(hdev->dev,
5278 "packet 0x%x is out of CB boundary\n", pkt_id);
5279 rc = -EINVAL;
5280 break;
5281 }
5282
5283 switch (pkt_id) {
5284 case PACKET_LIN_DMA:
5285 rc = gaudi_patch_dma_packet(hdev, parser,
5286 (struct packet_lin_dma *) user_pkt,
5287 (struct packet_lin_dma *) kernel_pkt,
5288 &new_pkt_size);
5289 cb_patched_cur_length += new_pkt_size;
5290 break;
5291
5292 case PACKET_MSG_PROT:
5293 dev_err(hdev->dev,
5294 "User not allowed to use MSG_PROT\n");
5295 rc = -EPERM;
5296 break;
5297
5298 case PACKET_CP_DMA:
5299 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5300 rc = -EPERM;
5301 break;
5302
5303 case PACKET_STOP:
5304 dev_err(hdev->dev, "User not allowed to use STOP\n");
5305 rc = -EPERM;
5306 break;
5307
5308 case PACKET_WREG_32:
5309 case PACKET_WREG_BULK:
5310 case PACKET_MSG_LONG:
5311 case PACKET_MSG_SHORT:
5312 case PACKET_REPEAT:
5313 case PACKET_FENCE:
5314 case PACKET_NOP:
5315 case PACKET_ARB_POINT:
5316 case PACKET_LOAD_AND_EXE:
5317 memcpy(kernel_pkt, user_pkt, pkt_size);
5318 cb_patched_cur_length += pkt_size;
5319 break;
5320
5321 default:
5322 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5323 pkt_id);
5324 rc = -EINVAL;
5325 break;
5326 }
5327
5328 if (rc)
5329 break;
5330 }
5331
5332 return rc;
5333 }
5334
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5335 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5336 struct hl_cs_parser *parser)
5337 {
5338 u64 handle;
5339 u32 patched_cb_size;
5340 struct hl_cb *user_cb;
5341 int rc;
5342
5343 /*
5344 * The new CB should have space at the end for two MSG_PROT packets:
5345 * 1. Optional NOP padding for cacheline alignment
5346 * 2. A packet that will act as a completion packet
5347 * 3. A packet that will generate MSI interrupt
5348 */
5349 if (parser->completion)
5350 parser->patched_cb_size = parser->user_cb_size +
5351 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5352 else
5353 parser->patched_cb_size = parser->user_cb_size;
5354
5355 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5356 parser->patched_cb_size, false, false,
5357 &handle);
5358
5359 if (rc) {
5360 dev_err(hdev->dev,
5361 "Failed to allocate patched CB for DMA CS %d\n",
5362 rc);
5363 return rc;
5364 }
5365
5366 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5367 /* hl_cb_get should never fail */
5368 if (!parser->patched_cb) {
5369 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5370 rc = -EFAULT;
5371 goto out;
5372 }
5373
5374 /*
5375 * We are protected from overflow because the check
5376 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5377 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5378 *
5379 * There is no option to reach here without going through that check because:
5380 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5381 * an external queue.
5382 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5383 */
5384 memcpy(parser->patched_cb->kernel_address,
5385 parser->user_cb->kernel_address,
5386 parser->user_cb_size);
5387
5388 patched_cb_size = parser->patched_cb_size;
5389
5390 /* Validate patched CB instead of user CB */
5391 user_cb = parser->user_cb;
5392 parser->user_cb = parser->patched_cb;
5393 rc = gaudi_validate_cb(hdev, parser, true);
5394 parser->user_cb = user_cb;
5395
5396 if (rc) {
5397 hl_cb_put(parser->patched_cb);
5398 goto out;
5399 }
5400
5401 if (patched_cb_size != parser->patched_cb_size) {
5402 dev_err(hdev->dev, "user CB size mismatch\n");
5403 hl_cb_put(parser->patched_cb);
5404 rc = -EINVAL;
5405 goto out;
5406 }
5407
5408 out:
5409 /*
5410 * Always call cb destroy here because we still have 1 reference
5411 * to it by calling cb_get earlier. After the job will be completed,
5412 * cb_put will release it, but here we want to remove it from the
5413 * idr
5414 */
5415 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5416
5417 return rc;
5418 }
5419
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5420 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5421 struct hl_cs_parser *parser)
5422 {
5423 u64 handle;
5424 int rc;
5425
5426 rc = gaudi_validate_cb(hdev, parser, false);
5427
5428 if (rc)
5429 goto free_userptr;
5430
5431 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5432 parser->patched_cb_size, false, false,
5433 &handle);
5434 if (rc) {
5435 dev_err(hdev->dev,
5436 "Failed to allocate patched CB for DMA CS %d\n", rc);
5437 goto free_userptr;
5438 }
5439
5440 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5441 /* hl_cb_get should never fail here */
5442 if (!parser->patched_cb) {
5443 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5444 rc = -EFAULT;
5445 goto out;
5446 }
5447
5448 rc = gaudi_patch_cb(hdev, parser);
5449
5450 if (rc)
5451 hl_cb_put(parser->patched_cb);
5452
5453 out:
5454 /*
5455 * Always call cb destroy here because we still have 1 reference
5456 * to it by calling cb_get earlier. After the job will be completed,
5457 * cb_put will release it, but here we want to remove it from the
5458 * idr
5459 */
5460 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5461
5462 free_userptr:
5463 if (rc)
5464 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5465 return rc;
5466 }
5467
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5468 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5469 struct hl_cs_parser *parser)
5470 {
5471 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5472 struct gaudi_device *gaudi = hdev->asic_specific;
5473 u32 nic_queue_offset, nic_mask_q_id;
5474
5475 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5476 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5477 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5478 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5479
5480 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5481 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5482 return -EINVAL;
5483 }
5484 }
5485
5486 /* For internal queue jobs just check if CB address is valid */
5487 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5488 parser->user_cb_size,
5489 asic_prop->sram_user_base_address,
5490 asic_prop->sram_end_address))
5491 return 0;
5492
5493 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5494 parser->user_cb_size,
5495 asic_prop->dram_user_base_address,
5496 asic_prop->dram_end_address))
5497 return 0;
5498
5499 /* PMMU and HPMMU addresses are equal, check only one of them */
5500 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5501 parser->user_cb_size,
5502 asic_prop->pmmu.start_addr,
5503 asic_prop->pmmu.end_addr))
5504 return 0;
5505
5506 dev_err(hdev->dev,
5507 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5508 parser->user_cb, parser->user_cb_size);
5509
5510 return -EFAULT;
5511 }
5512
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5513 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5514 {
5515 struct gaudi_device *gaudi = hdev->asic_specific;
5516
5517 if (parser->queue_type == QUEUE_TYPE_INT)
5518 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5519
5520 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5521 return gaudi_parse_cb_mmu(hdev, parser);
5522 else
5523 return gaudi_parse_cb_no_mmu(hdev, parser);
5524 }
5525
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5526 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5527 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5528 u32 msi_vec, bool eb)
5529 {
5530 struct packet_msg_prot *cq_pkt;
5531 struct packet_nop *cq_padding;
5532 u64 msi_addr;
5533 u32 tmp;
5534
5535 cq_padding = kernel_address + original_len;
5536 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5537
5538 while ((void *)cq_padding < (void *)cq_pkt) {
5539 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5540 cq_padding++;
5541 }
5542
5543 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5544 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5545
5546 if (eb)
5547 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5548
5549 cq_pkt->ctl = cpu_to_le32(tmp);
5550 cq_pkt->value = cpu_to_le32(cq_val);
5551 cq_pkt->addr = cpu_to_le64(cq_addr);
5552
5553 cq_pkt++;
5554
5555 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5556 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5557 cq_pkt->ctl = cpu_to_le32(tmp);
5558 cq_pkt->value = cpu_to_le32(1);
5559 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5560 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5561 }
5562
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5563 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5564 {
5565 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5566 }
5567
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5568 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5569 u32 size, u64 val)
5570 {
5571 struct packet_lin_dma *lin_dma_pkt;
5572 struct hl_cs_job *job;
5573 u32 cb_size, ctl, err_cause;
5574 struct hl_cb *cb;
5575 int rc;
5576
5577 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5578 if (!cb)
5579 return -EFAULT;
5580
5581 lin_dma_pkt = cb->kernel_address;
5582 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5583 cb_size = sizeof(*lin_dma_pkt);
5584
5585 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5586 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5587 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5588 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5589 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5590
5591 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5592 lin_dma_pkt->src_addr = cpu_to_le64(val);
5593 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5594 lin_dma_pkt->tsize = cpu_to_le32(size);
5595
5596 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5597 if (!job) {
5598 dev_err(hdev->dev, "Failed to allocate a new job\n");
5599 rc = -ENOMEM;
5600 goto release_cb;
5601 }
5602
5603 /* Verify DMA is OK */
5604 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5605 if (err_cause && !hdev->init_done) {
5606 dev_dbg(hdev->dev,
5607 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5608 err_cause);
5609 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5610 }
5611
5612 job->id = 0;
5613 job->user_cb = cb;
5614 atomic_inc(&job->user_cb->cs_cnt);
5615 job->user_cb_size = cb_size;
5616 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5617 job->patched_cb = job->user_cb;
5618 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5619
5620 hl_debugfs_add_job(hdev, job);
5621
5622 rc = gaudi_send_job_on_qman0(hdev, job);
5623 hl_debugfs_remove_job(hdev, job);
5624 kfree(job);
5625 atomic_dec(&cb->cs_cnt);
5626
5627 /* Verify DMA is OK */
5628 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5629 if (err_cause) {
5630 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5631 rc = -EIO;
5632 if (!hdev->init_done) {
5633 dev_dbg(hdev->dev,
5634 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5635 err_cause);
5636 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5637 }
5638 }
5639
5640 release_cb:
5641 hl_cb_put(cb);
5642 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5643
5644 return rc;
5645 }
5646
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5647 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5648 u32 num_regs, u32 val)
5649 {
5650 struct packet_msg_long *pkt;
5651 struct hl_cs_job *job;
5652 u32 cb_size, ctl;
5653 struct hl_cb *cb;
5654 int i, rc;
5655
5656 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5657
5658 if (cb_size > SZ_2M) {
5659 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5660 return -ENOMEM;
5661 }
5662
5663 cb = hl_cb_kernel_create(hdev, cb_size, false);
5664 if (!cb)
5665 return -EFAULT;
5666
5667 pkt = cb->kernel_address;
5668
5669 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5670 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5671 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5672 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5674
5675 for (i = 0; i < num_regs ; i++, pkt++) {
5676 pkt->ctl = cpu_to_le32(ctl);
5677 pkt->value = cpu_to_le32(val);
5678 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5679 }
5680
5681 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5682 if (!job) {
5683 dev_err(hdev->dev, "Failed to allocate a new job\n");
5684 rc = -ENOMEM;
5685 goto release_cb;
5686 }
5687
5688 job->id = 0;
5689 job->user_cb = cb;
5690 atomic_inc(&job->user_cb->cs_cnt);
5691 job->user_cb_size = cb_size;
5692 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5693 job->patched_cb = job->user_cb;
5694 job->job_cb_size = cb_size;
5695
5696 hl_debugfs_add_job(hdev, job);
5697
5698 rc = gaudi_send_job_on_qman0(hdev, job);
5699 hl_debugfs_remove_job(hdev, job);
5700 kfree(job);
5701 atomic_dec(&cb->cs_cnt);
5702
5703 release_cb:
5704 hl_cb_put(cb);
5705 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5706
5707 return rc;
5708 }
5709
gaudi_restore_sm_registers(struct hl_device * hdev)5710 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5711 {
5712 u64 base_addr;
5713 u32 num_regs;
5714 int rc;
5715
5716 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5717 num_regs = NUM_OF_SOB_IN_BLOCK;
5718 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5719 if (rc) {
5720 dev_err(hdev->dev, "failed resetting SM registers");
5721 return -ENOMEM;
5722 }
5723
5724 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5725 num_regs = NUM_OF_SOB_IN_BLOCK;
5726 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5727 if (rc) {
5728 dev_err(hdev->dev, "failed resetting SM registers");
5729 return -ENOMEM;
5730 }
5731
5732 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5733 num_regs = NUM_OF_SOB_IN_BLOCK;
5734 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5735 if (rc) {
5736 dev_err(hdev->dev, "failed resetting SM registers");
5737 return -ENOMEM;
5738 }
5739
5740 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5741 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5742 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5743 if (rc) {
5744 dev_err(hdev->dev, "failed resetting SM registers");
5745 return -ENOMEM;
5746 }
5747
5748 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5749 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5750 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5751 if (rc) {
5752 dev_err(hdev->dev, "failed resetting SM registers");
5753 return -ENOMEM;
5754 }
5755
5756 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5757 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5758 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5759 if (rc) {
5760 dev_err(hdev->dev, "failed resetting SM registers");
5761 return -ENOMEM;
5762 }
5763
5764 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5765 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5766 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5767 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5768 if (rc) {
5769 dev_err(hdev->dev, "failed resetting SM registers");
5770 return -ENOMEM;
5771 }
5772
5773 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5774 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5775 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5776 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5777 if (rc) {
5778 dev_err(hdev->dev, "failed resetting SM registers");
5779 return -ENOMEM;
5780 }
5781
5782 return 0;
5783 }
5784
gaudi_restore_dma_registers(struct hl_device * hdev)5785 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5786 {
5787 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5788 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5789 int i;
5790
5791 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5792 u64 sob_addr = CFG_BASE +
5793 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5794 (i * sob_delta);
5795 u32 dma_offset = i * DMA_CORE_OFFSET;
5796
5797 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5798 lower_32_bits(sob_addr));
5799 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5800 upper_32_bits(sob_addr));
5801 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5802
5803 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5804 * modified by the user for SRAM reduction
5805 */
5806 if (i > 1)
5807 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5808 0x00000001);
5809 }
5810 }
5811
gaudi_restore_qm_registers(struct hl_device * hdev)5812 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5813 {
5814 u32 qman_offset;
5815 int i;
5816
5817 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5818 qman_offset = i * DMA_QMAN_OFFSET;
5819 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5820 }
5821
5822 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5823 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5824 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5825 }
5826
5827 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5828 qman_offset = i * TPC_QMAN_OFFSET;
5829 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5830 }
5831
5832 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5833 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5834 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5835 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5836 }
5837 }
5838
gaudi_restore_user_registers(struct hl_device * hdev)5839 static int gaudi_restore_user_registers(struct hl_device *hdev)
5840 {
5841 int rc;
5842
5843 rc = gaudi_restore_sm_registers(hdev);
5844 if (rc)
5845 return rc;
5846
5847 gaudi_restore_dma_registers(hdev);
5848 gaudi_restore_qm_registers(hdev);
5849
5850 return 0;
5851 }
5852
gaudi_context_switch(struct hl_device * hdev,u32 asid)5853 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5854 {
5855 return 0;
5856 }
5857
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5858 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5859 {
5860 u32 size = hdev->asic_prop.mmu_pgt_size +
5861 hdev->asic_prop.mmu_cache_mng_size;
5862 struct gaudi_device *gaudi = hdev->asic_specific;
5863 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5864
5865 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5866 return 0;
5867
5868 return gaudi_memset_device_memory(hdev, addr, size, 0);
5869 }
5870
gaudi_restore_phase_topology(struct hl_device * hdev)5871 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5872 {
5873
5874 }
5875
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5876 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5877 u32 size_to_dma, dma_addr_t dma_addr)
5878 {
5879 u32 err_cause, val;
5880 u64 dma_offset;
5881 int rc;
5882
5883 dma_offset = dma_id * DMA_CORE_OFFSET;
5884
5885 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5886 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5887 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5888 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5889 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5890 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5891 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5892
5893 rc = hl_poll_timeout(
5894 hdev,
5895 mmDMA0_CORE_STS0 + dma_offset,
5896 val,
5897 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5898 0,
5899 1000000);
5900
5901 if (rc) {
5902 dev_err(hdev->dev,
5903 "DMA %d timed-out during reading of 0x%llx\n",
5904 dma_id, addr);
5905 return -EIO;
5906 }
5907
5908 /* Verify DMA is OK */
5909 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5910 if (err_cause) {
5911 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5912 dev_dbg(hdev->dev,
5913 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5914 err_cause);
5915 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5916
5917 return -EIO;
5918 }
5919
5920 return 0;
5921 }
5922
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5923 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5924 void *blob_addr)
5925 {
5926 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5927 u32 qm_glbl_sts0, qm_cgm_sts;
5928 u64 dma_offset, qm_offset;
5929 dma_addr_t dma_addr;
5930 void *kernel_addr;
5931 bool is_eng_idle;
5932 int rc = 0, dma_id;
5933
5934 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5935
5936 if (!kernel_addr)
5937 return -ENOMEM;
5938
5939 hdev->asic_funcs->hw_queues_lock(hdev);
5940
5941 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5942 dma_offset = dma_id * DMA_CORE_OFFSET;
5943 qm_offset = dma_id * DMA_QMAN_OFFSET;
5944 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5945 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5946 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5947 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5948 IS_DMA_IDLE(dma_core_sts0);
5949
5950 if (!is_eng_idle) {
5951 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5952 dma_offset = dma_id * DMA_CORE_OFFSET;
5953 qm_offset = dma_id * DMA_QMAN_OFFSET;
5954 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5955 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5956 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5957 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5958 IS_DMA_IDLE(dma_core_sts0);
5959
5960 if (!is_eng_idle) {
5961 dev_err_ratelimited(hdev->dev,
5962 "Can't read via DMA because it is BUSY\n");
5963 rc = -EAGAIN;
5964 goto out;
5965 }
5966 }
5967
5968 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5969 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5970 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5971
5972 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5973 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5974 * ASID
5975 */
5976 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5977
5978 /* Verify DMA is OK */
5979 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5980 if (err_cause) {
5981 dev_dbg(hdev->dev,
5982 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5983 err_cause);
5984 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5985 }
5986
5987 pos = 0;
5988 size_left = size;
5989 size_to_dma = SZ_2M;
5990
5991 while (size_left > 0) {
5992
5993 if (size_left < SZ_2M)
5994 size_to_dma = size_left;
5995
5996 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5997 dma_addr);
5998 if (rc)
5999 break;
6000
6001 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6002
6003 if (size_left <= SZ_2M)
6004 break;
6005
6006 pos += SZ_2M;
6007 addr += SZ_2M;
6008 size_left -= SZ_2M;
6009 }
6010
6011 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6012 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6013 * ASID
6014 */
6015 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6016 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6017
6018 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6019
6020 out:
6021 hdev->asic_funcs->hw_queues_unlock(hdev);
6022
6023 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6024
6025 return rc;
6026 }
6027
gaudi_read_pte(struct hl_device * hdev,u64 addr)6028 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6029 {
6030 struct gaudi_device *gaudi = hdev->asic_specific;
6031
6032 if (hdev->reset_info.hard_reset_pending)
6033 return U64_MAX;
6034
6035 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6036 (addr - gaudi->hbm_bar_cur_addr));
6037 }
6038
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6039 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6040 {
6041 struct gaudi_device *gaudi = hdev->asic_specific;
6042
6043 if (hdev->reset_info.hard_reset_pending)
6044 return;
6045
6046 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6047 (addr - gaudi->hbm_bar_cur_addr));
6048 }
6049
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6050 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6051 {
6052 /* mask to zero the MMBP and ASID bits */
6053 WREG32_AND(reg, ~0x7FF);
6054 WREG32_OR(reg, asid);
6055 }
6056
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6057 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6058 {
6059 struct gaudi_device *gaudi = hdev->asic_specific;
6060
6061 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6062 return;
6063
6064 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6065 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6066 return;
6067 }
6068
6069 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6070 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6071 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6074
6075 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6076 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6077 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6080
6081 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086
6087 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092
6093 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098
6099 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104
6105 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110
6111 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6112 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6113 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6116
6117 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6118 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6119 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6120 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6122 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6123 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6124 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6125
6126 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6127 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6128 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6133
6134 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6135 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6136 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6141
6142 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6149
6150 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6152 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6157
6158 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6165
6166 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6173
6174 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6176 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6181
6182 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6189
6190 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6200
6201 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6212 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6213
6214 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6215 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6216 asid);
6217 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6218 asid);
6219 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6220 asid);
6221 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6222 asid);
6223 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6224 asid);
6225 }
6226
6227 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6229 asid);
6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6231 asid);
6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6233 asid);
6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6235 asid);
6236 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6237 asid);
6238 }
6239
6240 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6241 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6242 asid);
6243 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6244 asid);
6245 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6246 asid);
6247 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6248 asid);
6249 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6250 asid);
6251 }
6252
6253 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6255 asid);
6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6257 asid);
6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6259 asid);
6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6261 asid);
6262 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6263 asid);
6264 }
6265
6266 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6267 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6268 asid);
6269 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6270 asid);
6271 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6272 asid);
6273 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6274 asid);
6275 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6276 asid);
6277 }
6278
6279 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6281 asid);
6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6283 asid);
6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6285 asid);
6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6287 asid);
6288 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6289 asid);
6290 }
6291
6292 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6293 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6294 asid);
6295 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6296 asid);
6297 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6298 asid);
6299 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6300 asid);
6301 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6302 asid);
6303 }
6304
6305 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6307 asid);
6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6309 asid);
6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6311 asid);
6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6313 asid);
6314 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6315 asid);
6316 }
6317
6318 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6319 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6320 asid);
6321 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6322 asid);
6323 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6324 asid);
6325 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6326 asid);
6327 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6328 asid);
6329 }
6330
6331 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6333 asid);
6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6335 asid);
6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6337 asid);
6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6339 asid);
6340 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6341 asid);
6342 }
6343
6344 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6345 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6346 }
6347
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6348 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6349 struct hl_cs_job *job)
6350 {
6351 struct packet_msg_prot *fence_pkt;
6352 u32 *fence_ptr;
6353 dma_addr_t fence_dma_addr;
6354 struct hl_cb *cb;
6355 u32 tmp, timeout, dma_offset;
6356 int rc;
6357
6358 if (hdev->pldm)
6359 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6360 else
6361 timeout = HL_DEVICE_TIMEOUT_USEC;
6362
6363 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6364 if (!fence_ptr) {
6365 dev_err(hdev->dev,
6366 "Failed to allocate fence memory for QMAN0\n");
6367 return -ENOMEM;
6368 }
6369
6370 cb = job->patched_cb;
6371
6372 fence_pkt = cb->kernel_address +
6373 job->job_cb_size - sizeof(struct packet_msg_prot);
6374
6375 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6376 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6377 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6378
6379 fence_pkt->ctl = cpu_to_le32(tmp);
6380 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6381 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6382
6383 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6384
6385 WREG32(mmDMA0_CORE_PROT + dma_offset,
6386 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6387
6388 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6389 job->job_cb_size, cb->bus_address);
6390 if (rc) {
6391 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6392 goto free_fence_ptr;
6393 }
6394
6395 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6396 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6397 timeout, true);
6398
6399 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6400
6401 if (rc == -ETIMEDOUT) {
6402 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6403 goto free_fence_ptr;
6404 }
6405
6406 free_fence_ptr:
6407 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6408
6409 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6410 return rc;
6411 }
6412
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6413 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6414 {
6415 if (event_type >= GAUDI_EVENT_SIZE)
6416 goto event_not_supported;
6417
6418 if (!gaudi_irq_map_table[event_type].valid)
6419 goto event_not_supported;
6420
6421 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6422
6423 return;
6424
6425 event_not_supported:
6426 snprintf(desc, size, "N/A");
6427 }
6428
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6429 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6430 bool is_write, u16 *engine_id_1,
6431 u16 *engine_id_2)
6432 {
6433 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6434
6435 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6436 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6437
6438 switch (x_y) {
6439 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6440 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6441 dma_id[0] = 0;
6442 dma_id[1] = 2;
6443 break;
6444 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6445 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6446 dma_id[0] = 1;
6447 dma_id[1] = 3;
6448 break;
6449 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6450 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6451 dma_id[0] = 4;
6452 dma_id[1] = 6;
6453 break;
6454 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6455 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6456 dma_id[0] = 5;
6457 dma_id[1] = 7;
6458 break;
6459 default:
6460 goto unknown_initiator;
6461 }
6462
6463 for (i = 0 ; i < 2 ; i++) {
6464 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6465 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466 }
6467
6468 switch (x_y) {
6469 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6470 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6471 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6472 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6473 return "DMA0";
6474 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6475 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6476 return "DMA2";
6477 } else {
6478 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6479 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6480 return "DMA0 or DMA2";
6481 }
6482 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6483 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6484 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6485 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6486 return "DMA1";
6487 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6488 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6489 return "DMA3";
6490 } else {
6491 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6492 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6493 return "DMA1 or DMA3";
6494 }
6495 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6496 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6497 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6498 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6499 return "DMA4";
6500 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6501 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6502 return "DMA6";
6503 } else {
6504 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6505 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6506 return "DMA4 or DMA6";
6507 }
6508 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6509 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6510 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6511 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6512 return "DMA5";
6513 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6514 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6515 return "DMA7";
6516 } else {
6517 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6518 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6519 return "DMA5 or DMA7";
6520 }
6521 }
6522
6523 unknown_initiator:
6524 return "unknown initiator";
6525 }
6526
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6527 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6528 u16 *engine_id_1, u16 *engine_id_2)
6529 {
6530 u32 val, x_y, axi_id;
6531
6532 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6533 RREG32(mmMMU_UP_RAZWI_READ_ID);
6534 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6535 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6536 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6537 RAZWI_INITIATOR_AXI_ID_SHIFT);
6538
6539 switch (x_y) {
6540 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6541 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6542 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6543 return "TPC0";
6544 }
6545 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6546 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6547 return "NIC0";
6548 }
6549 break;
6550 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6551 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6552 return "TPC1";
6553 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6554 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6555 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6556 return "MME0";
6557 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6558 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6559 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6560 return "MME1";
6561 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6562 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6563 return "TPC2";
6564 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6565 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6566 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6567 return "TPC3";
6568 }
6569 /* PCI, CPU or PSOC does not have engine id*/
6570 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6571 return "PCI";
6572 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6573 return "CPU";
6574 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6575 return "PSOC";
6576 break;
6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6578 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6579 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6585 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6586 engine_id_1, engine_id_2);
6587 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6588 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6589 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6590 return "TPC4";
6591 }
6592 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6593 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6594 return "NIC1";
6595 }
6596 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6597 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6598 return "NIC2";
6599 }
6600 break;
6601 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6602 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6603 return "TPC5";
6604 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6605 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6606 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6607 return "MME2";
6608 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6609 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6610 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6611 return "MME3";
6612 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6613 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6614 return "TPC6";
6615 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6616 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6617 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6618 return "TPC7";
6619 }
6620 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6621 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6622 return "NIC4";
6623 }
6624 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6625 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6626 return "NIC5";
6627 }
6628 break;
6629 default:
6630 break;
6631 }
6632
6633 dev_err(hdev->dev,
6634 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6635 val,
6636 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6637 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6638 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6639 RAZWI_INITIATOR_AXI_ID_MASK);
6640
6641 return "unknown initiator";
6642 }
6643
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u16 * engine_id_1,u16 * engine_id_2,bool * is_read,bool * is_write)6644 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6645 u16 *engine_id_2, bool *is_read, bool *is_write)
6646 {
6647
6648 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6649 dev_err_ratelimited(hdev->dev,
6650 "RAZWI event caused by illegal write of %s\n",
6651 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6652 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6653 *is_write = true;
6654 }
6655
6656 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6657 dev_err_ratelimited(hdev->dev,
6658 "RAZWI event caused by illegal read of %s\n",
6659 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6660 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6661 *is_read = true;
6662 }
6663 }
6664
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u64 * event_mask)6665 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6666 {
6667 struct gaudi_device *gaudi = hdev->asic_specific;
6668 u32 val;
6669
6670 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6671 return;
6672
6673 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6674 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6675 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6676 *addr <<= 32;
6677 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6678
6679 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6680 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6681
6682 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6683 }
6684
6685 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6686 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6687 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6688 *addr <<= 32;
6689 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6690
6691 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6692
6693 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6694 }
6695 }
6696
6697 /*
6698 * +-------------------+------------------------------------------------------+
6699 * | Configuration Reg | Description |
6700 * | Address | |
6701 * +-------------------+------------------------------------------------------+
6702 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6703 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6704 * | |0xF34 memory wrappers 63:32 |
6705 * | |0xF38 memory wrappers 95:64 |
6706 * | |0xF3C memory wrappers 127:96 |
6707 * +-------------------+------------------------------------------------------+
6708 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6709 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6710 * | |0xF44 memory wrappers 63:32 |
6711 * | |0xF48 memory wrappers 95:64 |
6712 * | |0xF4C memory wrappers 127:96 |
6713 * +-------------------+------------------------------------------------------+
6714 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6715 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6716 struct ecc_info_extract_params *params, u64 *ecc_address,
6717 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6718 {
6719 u32 i, num_mem_regs, reg, err_bit;
6720 u64 err_addr, err_word = 0;
6721
6722 num_mem_regs = params->num_memories / 32 +
6723 ((params->num_memories % 32) ? 1 : 0);
6724
6725 if (params->block_address >= CFG_BASE)
6726 params->block_address -= CFG_BASE;
6727
6728 if (params->derr)
6729 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6730 else
6731 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6732
6733 /* Set invalid wrapper index */
6734 *memory_wrapper_idx = 0xFF;
6735
6736 /* Iterate through memory wrappers, a single bit must be set */
6737 for (i = 0 ; i < num_mem_regs ; i++) {
6738 err_addr += i * 4;
6739 err_word = RREG32(err_addr);
6740 if (err_word) {
6741 err_bit = __ffs(err_word);
6742 *memory_wrapper_idx = err_bit + (32 * i);
6743 break;
6744 }
6745 }
6746
6747 if (*memory_wrapper_idx == 0xFF) {
6748 dev_err(hdev->dev, "ECC error information cannot be found\n");
6749 return -EINVAL;
6750 }
6751
6752 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6753 *memory_wrapper_idx);
6754
6755 *ecc_address =
6756 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6757 *ecc_syndrom =
6758 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6759
6760 /* Clear error indication */
6761 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6762 if (params->derr)
6763 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6764 else
6765 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6766
6767 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6768
6769 return 0;
6770 }
6771
6772 /*
6773 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6774 *
6775 * @idx: the current pi/ci value
6776 * @q_len: the queue length (power of 2)
6777 *
6778 * @return the cyclically decremented index
6779 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6780 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6781 {
6782 u32 mask = q_len - 1;
6783
6784 /*
6785 * modular decrement is equivalent to adding (queue_size -1)
6786 * later we take LSBs to make sure the value is in the
6787 * range [0, queue_len - 1]
6788 */
6789 return (idx + q_len - 1) & mask;
6790 }
6791
6792 /**
6793 * gaudi_handle_sw_config_stream_data - print SW config stream data
6794 *
6795 * @hdev: pointer to the habanalabs device structure
6796 * @stream: the QMAN's stream
6797 * @qman_base: base address of QMAN registers block
6798 * @event_mask: mask of the last events occurred
6799 */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6800 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6801 u64 qman_base, u64 event_mask)
6802 {
6803 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6804 u32 cq_ptr_lo_off, size;
6805
6806 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6807
6808 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6809 stream * cq_ptr_lo_off;
6810 cq_ptr_hi = cq_ptr_lo +
6811 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6812 cq_tsize = cq_ptr_lo +
6813 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6814
6815 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6816 size = RREG32(cq_tsize);
6817 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6818 stream, cq_ptr, size);
6819
6820 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6821 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6822 hdev->captured_err_info.undef_opcode.cq_size = size;
6823 hdev->captured_err_info.undef_opcode.stream_id = stream;
6824 }
6825 }
6826
6827 /**
6828 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6829 *
6830 * @hdev: pointer to the habanalabs device structure
6831 * @qid_base: first QID of the QMAN (out of 4 streams)
6832 * @stream: the QMAN's stream
6833 * @qman_base: base address of QMAN registers block
6834 * @event_mask: mask of the last events occurred
6835 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6836 */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6837 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6838 u32 stream, u64 qman_base,
6839 u64 event_mask,
6840 bool pr_sw_conf)
6841 {
6842 u32 ci, qm_ci_stream_off, queue_len;
6843 struct hl_hw_queue *q;
6844 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6845 int i;
6846
6847 q = &hdev->kernel_queues[qid_base + stream];
6848
6849 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6850 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6851 stream * qm_ci_stream_off;
6852
6853 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6854 q->int_queue_len : HL_QUEUE_LENGTH;
6855
6856 hdev->asic_funcs->hw_queues_lock(hdev);
6857
6858 if (pr_sw_conf)
6859 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6860
6861 ci = RREG32(pq_ci);
6862
6863 /* we should start printing form ci -1 */
6864 ci = gaudi_queue_idx_dec(ci, queue_len);
6865 memset(addr, 0, sizeof(addr));
6866
6867 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6868 struct hl_bd *bd;
6869 u32 len;
6870
6871 bd = q->kernel_address;
6872 bd += ci;
6873
6874 len = le32_to_cpu(bd->len);
6875 /* len 0 means uninitialized entry- break */
6876 if (!len)
6877 break;
6878
6879 addr[i] = le64_to_cpu(bd->ptr);
6880
6881 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6882 stream, ci, addr[i], len);
6883
6884 /* get previous ci, wrap if needed */
6885 ci = gaudi_queue_idx_dec(ci, queue_len);
6886 }
6887
6888 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6889 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6890 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6891
6892 if (arr_idx == 0) {
6893 undef_opcode->timestamp = ktime_get();
6894 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6895 }
6896
6897 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6898 undef_opcode->cb_addr_streams_len++;
6899 }
6900
6901 hdev->asic_funcs->hw_queues_unlock(hdev);
6902 }
6903
6904 /**
6905 * handle_qman_data_on_err - extract QMAN data on error
6906 *
6907 * @hdev: pointer to the habanalabs device structure
6908 * @qid_base: first QID of the QMAN (out of 4 streams)
6909 * @stream: the QMAN's stream
6910 * @qman_base: base address of QMAN registers block
6911 * @event_mask: mask of the last events occurred
6912 *
6913 * This function attempt to exatract as much data as possible on QMAN error.
6914 * On upper CP print the SW config stream data and last 8 PQEs.
6915 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6916 */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6917 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6918 u32 stream, u64 qman_base, u64 event_mask)
6919 {
6920 u32 i;
6921
6922 if (stream != QMAN_STREAMS) {
6923 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6924 qman_base, event_mask, true);
6925 return;
6926 }
6927
6928 /* handle Lower-CP */
6929 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6930
6931 for (i = 0; i < QMAN_STREAMS; i++)
6932 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6933 qman_base, event_mask, false);
6934 }
6935
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)6936 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6937 const char *qm_name,
6938 u64 qman_base,
6939 u32 qid_base,
6940 u64 *event_mask)
6941 {
6942 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6943 u64 glbl_sts_addr, arb_err_addr;
6944 char reg_desc[32];
6945
6946 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6947 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6948
6949 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6950 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6951 glbl_sts_clr_val = 0;
6952 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6953
6954 if (!glbl_sts_val)
6955 continue;
6956
6957 if (i == QMAN_STREAMS)
6958 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6959 else
6960 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6961
6962 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6963 if (glbl_sts_val & BIT(j)) {
6964 dev_err_ratelimited(hdev->dev,
6965 "%s %s. err cause: %s\n",
6966 qm_name, reg_desc,
6967 gaudi_qman_error_cause[j]);
6968 glbl_sts_clr_val |= BIT(j);
6969 }
6970 }
6971 /* check for undefined opcode */
6972 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6973 hdev->captured_err_info.undef_opcode.write_enable) {
6974 memset(&hdev->captured_err_info.undef_opcode, 0,
6975 sizeof(hdev->captured_err_info.undef_opcode));
6976
6977 hdev->captured_err_info.undef_opcode.write_enable = false;
6978 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6979 }
6980
6981 /* Write 1 clear errors */
6982 if (!hdev->stop_on_err)
6983 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6984 else
6985 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6986 }
6987
6988 arb_err_val = RREG32(arb_err_addr);
6989
6990 if (!arb_err_val)
6991 return;
6992
6993 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6994 if (arb_err_val & BIT(j)) {
6995 dev_err_ratelimited(hdev->dev,
6996 "%s ARB_ERR. err cause: %s\n",
6997 qm_name,
6998 gaudi_qman_arb_error_cause[j]);
6999 }
7000 }
7001 }
7002
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7003 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7004 struct hl_eq_sm_sei_data *sei_data)
7005 {
7006 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7007
7008 /* Flip the bits as the enum is ordered in the opposite way */
7009 index = (index ^ 0x3) & 0x3;
7010
7011 switch (sei_data->sei_cause) {
7012 case SM_SEI_SO_OVERFLOW:
7013 dev_err_ratelimited(hdev->dev,
7014 "%s SEI Error: SOB Group %u overflow/underflow",
7015 gaudi_sync_manager_names[index],
7016 le32_to_cpu(sei_data->sei_log));
7017 break;
7018 case SM_SEI_LBW_4B_UNALIGNED:
7019 dev_err_ratelimited(hdev->dev,
7020 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7021 gaudi_sync_manager_names[index],
7022 le32_to_cpu(sei_data->sei_log));
7023 break;
7024 case SM_SEI_AXI_RESPONSE_ERR:
7025 dev_err_ratelimited(hdev->dev,
7026 "%s SEI Error: AXI ID %u response error",
7027 gaudi_sync_manager_names[index],
7028 le32_to_cpu(sei_data->sei_log));
7029 break;
7030 default:
7031 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7032 le32_to_cpu(sei_data->sei_log));
7033 break;
7034 }
7035 }
7036
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7037 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7038 struct hl_eq_ecc_data *ecc_data)
7039 {
7040 struct ecc_info_extract_params params;
7041 u64 ecc_address = 0, ecc_syndrom = 0;
7042 u8 index, memory_wrapper_idx = 0;
7043 bool extract_info_from_fw;
7044 int rc;
7045
7046 if (hdev->asic_prop.fw_security_enabled) {
7047 extract_info_from_fw = true;
7048 goto extract_ecc_info;
7049 }
7050
7051 switch (event_type) {
7052 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7053 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7054 extract_info_from_fw = true;
7055 break;
7056 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7057 index = event_type - GAUDI_EVENT_TPC0_SERR;
7058 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7059 params.num_memories = 90;
7060 params.derr = false;
7061 extract_info_from_fw = false;
7062 break;
7063 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7064 index = event_type - GAUDI_EVENT_TPC0_DERR;
7065 params.block_address =
7066 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7067 params.num_memories = 90;
7068 params.derr = true;
7069 extract_info_from_fw = false;
7070 break;
7071 case GAUDI_EVENT_MME0_ACC_SERR:
7072 case GAUDI_EVENT_MME1_ACC_SERR:
7073 case GAUDI_EVENT_MME2_ACC_SERR:
7074 case GAUDI_EVENT_MME3_ACC_SERR:
7075 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7076 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7077 params.num_memories = 128;
7078 params.derr = false;
7079 extract_info_from_fw = false;
7080 break;
7081 case GAUDI_EVENT_MME0_ACC_DERR:
7082 case GAUDI_EVENT_MME1_ACC_DERR:
7083 case GAUDI_EVENT_MME2_ACC_DERR:
7084 case GAUDI_EVENT_MME3_ACC_DERR:
7085 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7086 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7087 params.num_memories = 128;
7088 params.derr = true;
7089 extract_info_from_fw = false;
7090 break;
7091 case GAUDI_EVENT_MME0_SBAB_SERR:
7092 case GAUDI_EVENT_MME1_SBAB_SERR:
7093 case GAUDI_EVENT_MME2_SBAB_SERR:
7094 case GAUDI_EVENT_MME3_SBAB_SERR:
7095 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7096 params.block_address =
7097 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7098 params.num_memories = 33;
7099 params.derr = false;
7100 extract_info_from_fw = false;
7101 break;
7102 case GAUDI_EVENT_MME0_SBAB_DERR:
7103 case GAUDI_EVENT_MME1_SBAB_DERR:
7104 case GAUDI_EVENT_MME2_SBAB_DERR:
7105 case GAUDI_EVENT_MME3_SBAB_DERR:
7106 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7107 params.block_address =
7108 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7109 params.num_memories = 33;
7110 params.derr = true;
7111 extract_info_from_fw = false;
7112 break;
7113 default:
7114 return;
7115 }
7116
7117 extract_ecc_info:
7118 if (extract_info_from_fw) {
7119 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7120 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7121 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7122 } else {
7123 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7124 &ecc_syndrom, &memory_wrapper_idx);
7125 if (rc)
7126 return;
7127 }
7128
7129 dev_err(hdev->dev,
7130 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7131 ecc_address, ecc_syndrom, memory_wrapper_idx);
7132 }
7133
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7134 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7135 {
7136 u64 qman_base;
7137 char desc[32];
7138 u32 qid_base;
7139 u8 index;
7140
7141 switch (event_type) {
7142 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7143 index = event_type - GAUDI_EVENT_TPC0_QM;
7144 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7145 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7146 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7147 break;
7148 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7149 if (event_type == GAUDI_EVENT_MME0_QM) {
7150 index = 0;
7151 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7152 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7153 index = 2;
7154 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7155 }
7156 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7157 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7158 break;
7159 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7160 index = event_type - GAUDI_EVENT_DMA0_QM;
7161 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7162 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7163 if (index > 1)
7164 qid_base++;
7165 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7166 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7167 break;
7168 case GAUDI_EVENT_NIC0_QM0:
7169 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7170 qman_base = mmNIC0_QM0_BASE;
7171 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7172 break;
7173 case GAUDI_EVENT_NIC0_QM1:
7174 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7175 qman_base = mmNIC0_QM1_BASE;
7176 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7177 break;
7178 case GAUDI_EVENT_NIC1_QM0:
7179 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7180 qman_base = mmNIC1_QM0_BASE;
7181 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7182 break;
7183 case GAUDI_EVENT_NIC1_QM1:
7184 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7185 qman_base = mmNIC1_QM1_BASE;
7186 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7187 break;
7188 case GAUDI_EVENT_NIC2_QM0:
7189 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7190 qman_base = mmNIC2_QM0_BASE;
7191 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7192 break;
7193 case GAUDI_EVENT_NIC2_QM1:
7194 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7195 qman_base = mmNIC2_QM1_BASE;
7196 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7197 break;
7198 case GAUDI_EVENT_NIC3_QM0:
7199 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7200 qman_base = mmNIC3_QM0_BASE;
7201 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7202 break;
7203 case GAUDI_EVENT_NIC3_QM1:
7204 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7205 qman_base = mmNIC3_QM1_BASE;
7206 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7207 break;
7208 case GAUDI_EVENT_NIC4_QM0:
7209 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7210 qman_base = mmNIC4_QM0_BASE;
7211 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7212 break;
7213 case GAUDI_EVENT_NIC4_QM1:
7214 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7215 qman_base = mmNIC4_QM1_BASE;
7216 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7217 break;
7218 default:
7219 return;
7220 }
7221
7222 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7223 }
7224
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool check_razwi,u64 * event_mask)7225 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7226 bool check_razwi, u64 *event_mask)
7227 {
7228 bool is_read = false, is_write = false;
7229 u16 engine_id[2], num_of_razwi_eng = 0;
7230 char desc[64] = "";
7231 u64 razwi_addr = 0;
7232 u8 razwi_flags = 0;
7233
7234 /*
7235 * Init engine id by default as not valid and only if razwi initiated from engine with
7236 * engine id it will get valid value.
7237 */
7238 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7239 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7240
7241 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7242 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7243 event_type, desc);
7244
7245 if (check_razwi) {
7246 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7247 &is_write);
7248 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7249
7250 if (is_read)
7251 razwi_flags |= HL_RAZWI_READ;
7252 if (is_write)
7253 razwi_flags |= HL_RAZWI_WRITE;
7254
7255 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7256 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7257 num_of_razwi_eng = 2;
7258 else
7259 num_of_razwi_eng = 1;
7260 }
7261
7262 if (razwi_flags)
7263 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7264 razwi_flags, event_mask);
7265 }
7266 }
7267
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7268 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7269 struct cpucp_pkt_sync_err *sync_err)
7270 {
7271 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7272
7273 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7274 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7275 }
7276
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7277 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7278 struct hl_eq_fw_alive *fw_alive)
7279 {
7280 dev_err(hdev->dev,
7281 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7282 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7283 le32_to_cpu(fw_alive->process_id),
7284 le32_to_cpu(fw_alive->thread_id),
7285 le64_to_cpu(fw_alive->uptime_seconds));
7286 }
7287
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7288 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7289 void *data)
7290 {
7291 char desc[64] = "", *type;
7292 struct eq_nic_sei_event *eq_nic_sei = data;
7293 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7294
7295 switch (eq_nic_sei->axi_error_cause) {
7296 case RXB:
7297 type = "RXB";
7298 break;
7299 case RXE:
7300 type = "RXE";
7301 break;
7302 case TXS:
7303 type = "TXS";
7304 break;
7305 case TXE:
7306 type = "TXE";
7307 break;
7308 case QPC_RESP:
7309 type = "QPC_RESP";
7310 break;
7311 case NON_AXI_ERR:
7312 type = "NON_AXI_ERR";
7313 break;
7314 case TMR:
7315 type = "TMR";
7316 break;
7317 default:
7318 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7319 eq_nic_sei->axi_error_cause);
7320 type = "N/A";
7321 break;
7322 }
7323
7324 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7325 eq_nic_sei->id);
7326 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7327 event_type, desc);
7328 }
7329
gaudi_compute_reset_late_init(struct hl_device * hdev)7330 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7331 {
7332 /* GAUDI doesn't support any reset except hard-reset */
7333 return -EPERM;
7334 }
7335
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7336 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7337 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7338 {
7339 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7340 int rc = 0;
7341
7342 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7343 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7344 if (!hbm_ecc_data) {
7345 dev_err(hdev->dev, "No FW ECC data");
7346 return 0;
7347 }
7348
7349 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7350 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7351 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7352 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7354 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7356 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7358 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7359 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7360 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7361 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7362 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363
7364 dev_err(hdev->dev,
7365 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7366 device, ch, wr_par, rd_par, ca_par, serr, derr);
7367 dev_err(hdev->dev,
7368 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7369 device, ch, hbm_ecc_data->first_addr, type,
7370 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7371 hbm_ecc_data->dec_cnt);
7372 return 0;
7373 }
7374
7375 if (hdev->asic_prop.fw_security_enabled) {
7376 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7377 return 0;
7378 }
7379
7380 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7381 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7382 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7383 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7384 if (val) {
7385 rc = -EIO;
7386 dev_err(hdev->dev,
7387 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7388 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7389 (val >> 2) & 0x1, (val >> 3) & 0x1,
7390 (val >> 4) & 0x1);
7391
7392 val2 = RREG32(base + ch * 0x1000 + 0x060);
7393 dev_err(hdev->dev,
7394 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7395 device, ch * 2,
7396 RREG32(base + ch * 0x1000 + 0x064),
7397 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7398 (val2 & 0xFF0000) >> 16,
7399 (val2 & 0xFF000000) >> 24);
7400 }
7401
7402 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7403 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7404 if (val) {
7405 rc = -EIO;
7406 dev_err(hdev->dev,
7407 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7408 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7409 (val >> 2) & 0x1, (val >> 3) & 0x1,
7410 (val >> 4) & 0x1);
7411
7412 val2 = RREG32(base + ch * 0x1000 + 0x070);
7413 dev_err(hdev->dev,
7414 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7415 device, ch * 2 + 1,
7416 RREG32(base + ch * 0x1000 + 0x074),
7417 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7418 (val2 & 0xFF0000) >> 16,
7419 (val2 & 0xFF000000) >> 24);
7420 }
7421
7422 /* Clear interrupts */
7423 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7424 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7425 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7426 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7427 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7428 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7429 }
7430
7431 val = RREG32(base + 0x8F30);
7432 val2 = RREG32(base + 0x8F34);
7433 if (val | val2) {
7434 rc = -EIO;
7435 dev_err(hdev->dev,
7436 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7437 device, val, val2);
7438 }
7439 val = RREG32(base + 0x8F40);
7440 val2 = RREG32(base + 0x8F44);
7441 if (val | val2) {
7442 rc = -EIO;
7443 dev_err(hdev->dev,
7444 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7445 device, val, val2);
7446 }
7447
7448 return rc;
7449 }
7450
gaudi_hbm_event_to_dev(u16 hbm_event_type)7451 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7452 {
7453 switch (hbm_event_type) {
7454 case GAUDI_EVENT_HBM0_SPI_0:
7455 case GAUDI_EVENT_HBM0_SPI_1:
7456 return 0;
7457 case GAUDI_EVENT_HBM1_SPI_0:
7458 case GAUDI_EVENT_HBM1_SPI_1:
7459 return 1;
7460 case GAUDI_EVENT_HBM2_SPI_0:
7461 case GAUDI_EVENT_HBM2_SPI_1:
7462 return 2;
7463 case GAUDI_EVENT_HBM3_SPI_0:
7464 case GAUDI_EVENT_HBM3_SPI_1:
7465 return 3;
7466 default:
7467 break;
7468 }
7469
7470 /* Should never happen */
7471 return 0;
7472 }
7473
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7474 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7475 char *interrupt_name)
7476 {
7477 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7478 bool soft_reset_required = false;
7479
7480 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7481 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7482
7483 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7484 if (tpc_interrupts_cause & BIT(i)) {
7485 dev_err_ratelimited(hdev->dev,
7486 "TPC%d_%s interrupt cause: %s\n",
7487 tpc_id, interrupt_name,
7488 gaudi_tpc_interrupts_cause[i]);
7489 /* If this is QM error, we need to soft-reset */
7490 if (i == 15)
7491 soft_reset_required = true;
7492 }
7493
7494 /* Clear interrupts */
7495 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7496
7497 return soft_reset_required;
7498 }
7499
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7500 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7501 {
7502 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7503 }
7504
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7505 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7506 {
7507 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7508 }
7509
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)7510 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7511 {
7512 ktime_t zero_time = ktime_set(0, 0);
7513
7514 mutex_lock(&hdev->clk_throttling.lock);
7515
7516 switch (event_type) {
7517 case GAUDI_EVENT_FIX_POWER_ENV_S:
7518 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7519 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7520 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7521 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7522 dev_info_ratelimited(hdev->dev,
7523 "Clock throttling due to power consumption\n");
7524 break;
7525
7526 case GAUDI_EVENT_FIX_POWER_ENV_E:
7527 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7528 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7529 dev_info_ratelimited(hdev->dev,
7530 "Power envelop is safe, back to optimal clock\n");
7531 break;
7532
7533 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7534 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7535 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7536 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7537 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7538 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7539 dev_info_ratelimited(hdev->dev,
7540 "Clock throttling due to overheating\n");
7541 break;
7542
7543 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7544 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7545 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7546 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7547 dev_info_ratelimited(hdev->dev,
7548 "Thermal envelop is safe, back to optimal clock\n");
7549 break;
7550
7551 default:
7552 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7553 event_type);
7554 break;
7555 }
7556
7557 mutex_unlock(&hdev->clk_throttling.lock);
7558 }
7559
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7560 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7561 {
7562 struct gaudi_device *gaudi = hdev->asic_specific;
7563 struct hl_info_fw_err_info fw_err_info;
7564 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7565 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7566 u32 fw_fatal_err_flag = 0, flags = 0;
7567 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7568 >> EQ_CTL_EVENT_TYPE_SHIFT);
7569 bool reset_required, reset_direct = false;
7570 u8 cause;
7571 int rc;
7572
7573 if (event_type >= GAUDI_EVENT_SIZE) {
7574 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7575 event_type, GAUDI_EVENT_SIZE - 1);
7576 return;
7577 }
7578
7579 gaudi->events_stat[event_type]++;
7580 gaudi->events_stat_aggregate[event_type]++;
7581
7582 switch (event_type) {
7583 case GAUDI_EVENT_PCIE_CORE_DERR:
7584 case GAUDI_EVENT_PCIE_IF_DERR:
7585 case GAUDI_EVENT_PCIE_PHY_DERR:
7586 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7587 case GAUDI_EVENT_MME0_ACC_DERR:
7588 case GAUDI_EVENT_MME0_SBAB_DERR:
7589 case GAUDI_EVENT_MME1_ACC_DERR:
7590 case GAUDI_EVENT_MME1_SBAB_DERR:
7591 case GAUDI_EVENT_MME2_ACC_DERR:
7592 case GAUDI_EVENT_MME2_SBAB_DERR:
7593 case GAUDI_EVENT_MME3_ACC_DERR:
7594 case GAUDI_EVENT_MME3_SBAB_DERR:
7595 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7596 fallthrough;
7597 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7598 case GAUDI_EVENT_PSOC_MEM_DERR:
7599 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7600 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7601 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7602 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7603 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7604 case GAUDI_EVENT_MMU_DERR:
7605 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7606 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7607 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7608 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7609 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7610 goto reset_device;
7611
7612 case GAUDI_EVENT_GIC500:
7613 case GAUDI_EVENT_AXI_ECC:
7614 case GAUDI_EVENT_L2_RAM_ECC:
7615 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7616 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7617 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7618 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7619 goto reset_device;
7620
7621 case GAUDI_EVENT_HBM0_SPI_0:
7622 case GAUDI_EVENT_HBM1_SPI_0:
7623 case GAUDI_EVENT_HBM2_SPI_0:
7624 case GAUDI_EVENT_HBM3_SPI_0:
7625 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7626 gaudi_hbm_read_interrupts(hdev,
7627 gaudi_hbm_event_to_dev(event_type),
7628 &eq_entry->hbm_ecc_data);
7629 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7630 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7631 goto reset_device;
7632
7633 case GAUDI_EVENT_HBM0_SPI_1:
7634 case GAUDI_EVENT_HBM1_SPI_1:
7635 case GAUDI_EVENT_HBM2_SPI_1:
7636 case GAUDI_EVENT_HBM3_SPI_1:
7637 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7638 gaudi_hbm_read_interrupts(hdev,
7639 gaudi_hbm_event_to_dev(event_type),
7640 &eq_entry->hbm_ecc_data);
7641 hl_fw_unmask_irq(hdev, event_type);
7642 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7643 break;
7644
7645 case GAUDI_EVENT_TPC0_DEC:
7646 case GAUDI_EVENT_TPC1_DEC:
7647 case GAUDI_EVENT_TPC2_DEC:
7648 case GAUDI_EVENT_TPC3_DEC:
7649 case GAUDI_EVENT_TPC4_DEC:
7650 case GAUDI_EVENT_TPC5_DEC:
7651 case GAUDI_EVENT_TPC6_DEC:
7652 case GAUDI_EVENT_TPC7_DEC:
7653 /* In TPC DEC event, notify on TPC assertion. While there isn't
7654 * a specific event for assertion yet, the FW generates TPC DEC event.
7655 * The SW upper layer will inspect an internal mapped area to indicate
7656 * if the event is a TPC Assertion or a "real" TPC DEC.
7657 */
7658 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7659 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7660 reset_required = gaudi_tpc_read_interrupts(hdev,
7661 tpc_dec_event_to_tpc_id(event_type),
7662 "AXI_SLV_DEC_Error");
7663 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7664 if (reset_required) {
7665 dev_err(hdev->dev, "reset required due to %s\n",
7666 gaudi_irq_map_table[event_type].name);
7667
7668 reset_direct = true;
7669 goto reset_device;
7670 } else {
7671 hl_fw_unmask_irq(hdev, event_type);
7672 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7673 }
7674 break;
7675
7676 case GAUDI_EVENT_TPC0_KRN_ERR:
7677 case GAUDI_EVENT_TPC1_KRN_ERR:
7678 case GAUDI_EVENT_TPC2_KRN_ERR:
7679 case GAUDI_EVENT_TPC3_KRN_ERR:
7680 case GAUDI_EVENT_TPC4_KRN_ERR:
7681 case GAUDI_EVENT_TPC5_KRN_ERR:
7682 case GAUDI_EVENT_TPC6_KRN_ERR:
7683 case GAUDI_EVENT_TPC7_KRN_ERR:
7684 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7685 reset_required = gaudi_tpc_read_interrupts(hdev,
7686 tpc_krn_event_to_tpc_id(event_type),
7687 "KRN_ERR");
7688 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7689 if (reset_required) {
7690 dev_err(hdev->dev, "reset required due to %s\n",
7691 gaudi_irq_map_table[event_type].name);
7692
7693 reset_direct = true;
7694 goto reset_device;
7695 } else {
7696 hl_fw_unmask_irq(hdev, event_type);
7697 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7698 }
7699 break;
7700
7701 case GAUDI_EVENT_PCIE_CORE_SERR:
7702 case GAUDI_EVENT_PCIE_IF_SERR:
7703 case GAUDI_EVENT_PCIE_PHY_SERR:
7704 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7705 case GAUDI_EVENT_MME0_ACC_SERR:
7706 case GAUDI_EVENT_MME0_SBAB_SERR:
7707 case GAUDI_EVENT_MME1_ACC_SERR:
7708 case GAUDI_EVENT_MME1_SBAB_SERR:
7709 case GAUDI_EVENT_MME2_ACC_SERR:
7710 case GAUDI_EVENT_MME2_SBAB_SERR:
7711 case GAUDI_EVENT_MME3_ACC_SERR:
7712 case GAUDI_EVENT_MME3_SBAB_SERR:
7713 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7714 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7715 case GAUDI_EVENT_PSOC_MEM_SERR:
7716 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7717 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7718 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7719 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7720 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7721 fallthrough;
7722 case GAUDI_EVENT_MMU_SERR:
7723 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7724 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7725 hl_fw_unmask_irq(hdev, event_type);
7726 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7727 break;
7728
7729 case GAUDI_EVENT_PCIE_DEC:
7730 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7731 case GAUDI_EVENT_PSOC_AXI_DEC:
7732 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7733 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7734 hl_fw_unmask_irq(hdev, event_type);
7735 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7736 break;
7737
7738 case GAUDI_EVENT_MMU_PAGE_FAULT:
7739 case GAUDI_EVENT_MMU_WR_PERM:
7740 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7741 hl_fw_unmask_irq(hdev, event_type);
7742 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7743 break;
7744
7745 case GAUDI_EVENT_MME0_WBC_RSP:
7746 case GAUDI_EVENT_MME0_SBAB0_RSP:
7747 case GAUDI_EVENT_MME1_WBC_RSP:
7748 case GAUDI_EVENT_MME1_SBAB0_RSP:
7749 case GAUDI_EVENT_MME2_WBC_RSP:
7750 case GAUDI_EVENT_MME2_SBAB0_RSP:
7751 case GAUDI_EVENT_MME3_WBC_RSP:
7752 case GAUDI_EVENT_MME3_SBAB0_RSP:
7753 case GAUDI_EVENT_RAZWI_OR_ADC:
7754 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7755 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7756 fallthrough;
7757 case GAUDI_EVENT_NIC0_QM0:
7758 case GAUDI_EVENT_NIC0_QM1:
7759 case GAUDI_EVENT_NIC1_QM0:
7760 case GAUDI_EVENT_NIC1_QM1:
7761 case GAUDI_EVENT_NIC2_QM0:
7762 case GAUDI_EVENT_NIC2_QM1:
7763 case GAUDI_EVENT_NIC3_QM0:
7764 case GAUDI_EVENT_NIC3_QM1:
7765 case GAUDI_EVENT_NIC4_QM0:
7766 case GAUDI_EVENT_NIC4_QM1:
7767 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7768 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7769 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7770 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7771 hl_fw_unmask_irq(hdev, event_type);
7772 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7773 break;
7774
7775 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7776 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7777 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7778 goto reset_device;
7779
7780 case GAUDI_EVENT_TPC0_BMON_SPMU:
7781 case GAUDI_EVENT_TPC1_BMON_SPMU:
7782 case GAUDI_EVENT_TPC2_BMON_SPMU:
7783 case GAUDI_EVENT_TPC3_BMON_SPMU:
7784 case GAUDI_EVENT_TPC4_BMON_SPMU:
7785 case GAUDI_EVENT_TPC5_BMON_SPMU:
7786 case GAUDI_EVENT_TPC6_BMON_SPMU:
7787 case GAUDI_EVENT_TPC7_BMON_SPMU:
7788 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7789 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7790 hl_fw_unmask_irq(hdev, event_type);
7791 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7792 break;
7793
7794 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7795 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7796 hl_fw_unmask_irq(hdev, event_type);
7797 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7798 break;
7799
7800 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7801 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7802 gaudi_print_sm_sei_info(hdev, event_type,
7803 &eq_entry->sm_sei_data);
7804 rc = hl_state_dump(hdev);
7805 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7806 if (rc)
7807 dev_err(hdev->dev,
7808 "Error during system state dump %d\n", rc);
7809 hl_fw_unmask_irq(hdev, event_type);
7810 break;
7811
7812 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7813 break;
7814
7815 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7816 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7817 hl_fw_unmask_irq(hdev, event_type);
7818 break;
7819
7820 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7821 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7822 dev_err(hdev->dev,
7823 "Received high temp H/W interrupt %d (cause %d)\n",
7824 event_type, cause);
7825 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7826 break;
7827
7828 case GAUDI_EVENT_DEV_RESET_REQ:
7829 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7830 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7831 goto reset_device;
7832
7833 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7834 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7835 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7836 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7837 goto reset_device;
7838
7839 case GAUDI_EVENT_FW_ALIVE_S:
7840 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7841 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7842 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7843 fw_err_info.event_id = event_type;
7844 fw_err_info.event_mask = &event_mask;
7845 hl_handle_fw_err(hdev, &fw_err_info);
7846 goto reset_device;
7847
7848 default:
7849 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7850 event_type);
7851 break;
7852 }
7853
7854 if (event_mask)
7855 hl_notifier_event_send_all(hdev, event_mask);
7856
7857 return;
7858
7859 reset_device:
7860 reset_required = true;
7861
7862 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7863 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7864
7865 /* notify on device unavailable while the reset triggered by fw */
7866 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7867 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7868 } else if (hdev->hard_reset_on_fw_events) {
7869 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7870 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7871 } else {
7872 reset_required = false;
7873 }
7874
7875 if (reset_required) {
7876 /* escalate general hw errors to critical/fatal error */
7877 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7878 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7879
7880 hl_device_cond_reset(hdev, flags, event_mask);
7881 } else {
7882 hl_fw_unmask_irq(hdev, event_type);
7883 /* Notification on occurred event needs to be sent although reset is not executed */
7884 if (event_mask)
7885 hl_notifier_event_send_all(hdev, event_mask);
7886 }
7887 }
7888
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7889 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7890 {
7891 struct gaudi_device *gaudi = hdev->asic_specific;
7892
7893 if (aggregate) {
7894 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7895 return gaudi->events_stat_aggregate;
7896 }
7897
7898 *size = (u32) sizeof(gaudi->events_stat);
7899 return gaudi->events_stat;
7900 }
7901
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7902 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7903 {
7904 struct gaudi_device *gaudi = hdev->asic_specific;
7905 u32 status, timeout_usec;
7906 int rc;
7907
7908 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7909 hdev->reset_info.hard_reset_pending)
7910 return 0;
7911
7912 if (hdev->pldm)
7913 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7914 else
7915 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7916
7917 /* L0 & L1 invalidation */
7918 WREG32(mmSTLB_INV_PS, 3);
7919 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7920 WREG32(mmSTLB_INV_PS, 2);
7921
7922 rc = hl_poll_timeout(
7923 hdev,
7924 mmSTLB_INV_PS,
7925 status,
7926 !status,
7927 1000,
7928 timeout_usec);
7929
7930 WREG32(mmSTLB_INV_SET, 0);
7931
7932 return rc;
7933 }
7934
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)7935 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7936 bool is_hard, u32 flags,
7937 u32 asid, u64 va, u64 size)
7938 {
7939 /* Treat as invalidate all because there is no range invalidation
7940 * in Gaudi
7941 */
7942 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7943 }
7944
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7945 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7946 {
7947 u32 status, timeout_usec;
7948 int rc;
7949
7950 if (hdev->pldm)
7951 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7952 else
7953 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7954
7955 WREG32(MMU_ASID, asid);
7956 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7957 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7958 WREG32(MMU_BUSY, 0x80000000);
7959
7960 rc = hl_poll_timeout(
7961 hdev,
7962 MMU_BUSY,
7963 status,
7964 !(status & 0x80000000),
7965 1000,
7966 timeout_usec);
7967
7968 if (rc) {
7969 dev_err(hdev->dev,
7970 "Timeout during MMU hop0 config of asid %d\n", asid);
7971 return rc;
7972 }
7973
7974 return 0;
7975 }
7976
gaudi_send_heartbeat(struct hl_device * hdev)7977 static int gaudi_send_heartbeat(struct hl_device *hdev)
7978 {
7979 struct gaudi_device *gaudi = hdev->asic_specific;
7980
7981 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7982 return 0;
7983
7984 return hl_fw_send_heartbeat(hdev);
7985 }
7986
gaudi_cpucp_info_get(struct hl_device * hdev)7987 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7988 {
7989 struct gaudi_device *gaudi = hdev->asic_specific;
7990 struct asic_fixed_properties *prop = &hdev->asic_prop;
7991 int rc;
7992
7993 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7994 return 0;
7995
7996 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7997 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7998 mmCPU_BOOT_ERR1);
7999 if (rc)
8000 return rc;
8001
8002 if (!strlen(prop->cpucp_info.card_name))
8003 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8004 CARD_NAME_MAX_LEN);
8005
8006 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8007
8008 set_default_power_values(hdev);
8009
8010 return 0;
8011 }
8012
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8013 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8014 struct engines_data *e)
8015 {
8016 struct gaudi_device *gaudi = hdev->asic_specific;
8017 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8018 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8019 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8020 unsigned long *mask = (unsigned long *)mask_arr;
8021 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8022 bool is_idle = true, is_eng_idle, is_slave;
8023 u64 offset;
8024 int i, dma_id, port;
8025
8026 if (e)
8027 hl_engine_data_sprintf(e,
8028 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8029 "--- ------- ------------ ---------- -------------\n");
8030
8031 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8032 dma_id = gaudi_dma_assignment[i];
8033 offset = dma_id * DMA_QMAN_OFFSET;
8034
8035 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8036 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8037 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8038 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8039 IS_DMA_IDLE(dma_core_sts0);
8040 is_idle &= is_eng_idle;
8041
8042 if (mask && !is_eng_idle)
8043 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8044 if (e)
8045 hl_engine_data_sprintf(e, fmt, dma_id,
8046 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8047 qm_cgm_sts, dma_core_sts0);
8048 }
8049
8050 if (e)
8051 hl_engine_data_sprintf(e,
8052 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8053 "--- ------- ------------ ---------- ----------\n");
8054
8055 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8056 offset = i * TPC_QMAN_OFFSET;
8057 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8058 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8059 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8060 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8061 IS_TPC_IDLE(tpc_cfg_sts);
8062 is_idle &= is_eng_idle;
8063
8064 if (mask && !is_eng_idle)
8065 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8066 if (e)
8067 hl_engine_data_sprintf(e, fmt, i,
8068 is_eng_idle ? "Y" : "N",
8069 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8070 }
8071
8072 if (e)
8073 hl_engine_data_sprintf(e,
8074 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8075 "--- ------- ------------ ---------- -----------\n");
8076
8077 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8078 offset = i * MME_QMAN_OFFSET;
8079 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8080 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8081
8082 /* MME 1 & 3 are slaves, no need to check their QMANs */
8083 is_slave = i % 2;
8084 if (!is_slave) {
8085 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8086 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8087 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8088 }
8089
8090 is_idle &= is_eng_idle;
8091
8092 if (mask && !is_eng_idle)
8093 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8094 if (e) {
8095 if (!is_slave)
8096 hl_engine_data_sprintf(e, fmt, i,
8097 is_eng_idle ? "Y" : "N",
8098 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8099 else
8100 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8101 is_eng_idle ? "Y" : "N", "-",
8102 "-", mme_arch_sts);
8103 }
8104 }
8105
8106 if (e)
8107 hl_engine_data_sprintf(e,
8108 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8109 "--- ------- ------------ ----------\n");
8110
8111 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8112 offset = i * NIC_MACRO_QMAN_OFFSET;
8113 port = 2 * i;
8114 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8115 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8116 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8117 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8118 is_idle &= is_eng_idle;
8119
8120 if (mask && !is_eng_idle)
8121 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8122 if (e)
8123 hl_engine_data_sprintf(e, nic_fmt, port,
8124 is_eng_idle ? "Y" : "N",
8125 qm_glbl_sts0, qm_cgm_sts);
8126 }
8127
8128 port = 2 * i + 1;
8129 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8130 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8131 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8132 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8133 is_idle &= is_eng_idle;
8134
8135 if (mask && !is_eng_idle)
8136 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8137 if (e)
8138 hl_engine_data_sprintf(e, nic_fmt, port,
8139 is_eng_idle ? "Y" : "N",
8140 qm_glbl_sts0, qm_cgm_sts);
8141 }
8142 }
8143
8144 if (e)
8145 hl_engine_data_sprintf(e, "\n");
8146
8147 return is_idle;
8148 }
8149
gaudi_hw_queues_lock(struct hl_device * hdev)8150 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8151 __acquires(&gaudi->hw_queues_lock)
8152 {
8153 struct gaudi_device *gaudi = hdev->asic_specific;
8154
8155 spin_lock(&gaudi->hw_queues_lock);
8156 }
8157
gaudi_hw_queues_unlock(struct hl_device * hdev)8158 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8159 __releases(&gaudi->hw_queues_lock)
8160 {
8161 struct gaudi_device *gaudi = hdev->asic_specific;
8162
8163 spin_unlock(&gaudi->hw_queues_lock);
8164 }
8165
gaudi_get_pci_id(struct hl_device * hdev)8166 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8167 {
8168 return hdev->pdev->device;
8169 }
8170
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8171 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8172 size_t max_size)
8173 {
8174 struct gaudi_device *gaudi = hdev->asic_specific;
8175
8176 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8177 return 0;
8178
8179 return hl_fw_get_eeprom_data(hdev, data, max_size);
8180 }
8181
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8182 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8183 {
8184 struct gaudi_device *gaudi = hdev->asic_specific;
8185
8186 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8187 return 0;
8188
8189 return hl_fw_get_monitor_dump(hdev, data);
8190 }
8191
8192 /*
8193 * this function should be used only during initialization and/or after reset,
8194 * when there are no active users.
8195 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8196 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8197 {
8198 u64 kernel_timeout;
8199 u32 status, offset;
8200 int rc;
8201
8202 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8203
8204 if (hdev->pldm)
8205 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8206 else
8207 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8208
8209 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8210 lower_32_bits(tpc_kernel));
8211 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8212 upper_32_bits(tpc_kernel));
8213
8214 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8215 lower_32_bits(tpc_kernel));
8216 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8217 upper_32_bits(tpc_kernel));
8218 /* set a valid LUT pointer, content is of no significance */
8219 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8220 lower_32_bits(tpc_kernel));
8221 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8222 upper_32_bits(tpc_kernel));
8223
8224 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8225 lower_32_bits(CFG_BASE +
8226 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8227
8228 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8229 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8230 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8231 /* wait a bit for the engine to start executing */
8232 usleep_range(1000, 1500);
8233
8234 /* wait until engine has finished executing */
8235 rc = hl_poll_timeout(
8236 hdev,
8237 mmTPC0_CFG_STATUS + offset,
8238 status,
8239 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8240 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8241 1000,
8242 kernel_timeout);
8243
8244 if (rc) {
8245 dev_err(hdev->dev,
8246 "Timeout while waiting for TPC%d icache prefetch\n",
8247 tpc_id);
8248 return -EIO;
8249 }
8250
8251 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8252 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8253
8254 /* wait a bit for the engine to start executing */
8255 usleep_range(1000, 1500);
8256
8257 /* wait until engine has finished executing */
8258 rc = hl_poll_timeout(
8259 hdev,
8260 mmTPC0_CFG_STATUS + offset,
8261 status,
8262 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8263 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8264 1000,
8265 kernel_timeout);
8266
8267 if (rc) {
8268 dev_err(hdev->dev,
8269 "Timeout while waiting for TPC%d vector pipe\n",
8270 tpc_id);
8271 return -EIO;
8272 }
8273
8274 rc = hl_poll_timeout(
8275 hdev,
8276 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8277 status,
8278 (status == 0),
8279 1000,
8280 kernel_timeout);
8281
8282 if (rc) {
8283 dev_err(hdev->dev,
8284 "Timeout while waiting for TPC%d kernel to execute\n",
8285 tpc_id);
8286 return -EIO;
8287 }
8288
8289 return 0;
8290 }
8291
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8292 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8293 struct hl_ctx *ctx)
8294 {
8295 struct gaudi_device *gaudi = hdev->asic_specific;
8296 int min_alloc_order, rc, collective_cb_size;
8297
8298 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8299 return 0;
8300
8301 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8302 HOST_SPACE_INTERNAL_CB_SZ,
8303 &hdev->internal_cb_pool_dma_addr,
8304 GFP_KERNEL | __GFP_ZERO);
8305
8306 if (!hdev->internal_cb_pool_virt_addr)
8307 return -ENOMEM;
8308
8309 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8310 sizeof(struct packet_fence);
8311 min_alloc_order = ilog2(collective_cb_size);
8312
8313 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8314 if (!hdev->internal_cb_pool) {
8315 dev_err(hdev->dev,
8316 "Failed to create internal CB pool\n");
8317 rc = -ENOMEM;
8318 goto free_internal_cb_pool;
8319 }
8320
8321 rc = gen_pool_add(hdev->internal_cb_pool,
8322 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8323 HOST_SPACE_INTERNAL_CB_SZ, -1);
8324 if (rc) {
8325 dev_err(hdev->dev,
8326 "Failed to add memory to internal CB pool\n");
8327 rc = -EFAULT;
8328 goto destroy_internal_cb_pool;
8329 }
8330
8331 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8332 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8333 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8334
8335 if (!hdev->internal_cb_va_base) {
8336 rc = -ENOMEM;
8337 goto destroy_internal_cb_pool;
8338 }
8339
8340 mutex_lock(&hdev->mmu_lock);
8341
8342 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8343 hdev->internal_cb_pool_dma_addr,
8344 HOST_SPACE_INTERNAL_CB_SZ);
8345 if (rc)
8346 goto unreserve_internal_cb_pool;
8347
8348 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8349 if (rc)
8350 goto unmap_internal_cb_pool;
8351
8352 mutex_unlock(&hdev->mmu_lock);
8353
8354 return 0;
8355
8356 unmap_internal_cb_pool:
8357 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8358 HOST_SPACE_INTERNAL_CB_SZ);
8359 unreserve_internal_cb_pool:
8360 mutex_unlock(&hdev->mmu_lock);
8361 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8362 HOST_SPACE_INTERNAL_CB_SZ);
8363 destroy_internal_cb_pool:
8364 gen_pool_destroy(hdev->internal_cb_pool);
8365 free_internal_cb_pool:
8366 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8367 hdev->internal_cb_pool_dma_addr);
8368
8369 return rc;
8370 }
8371
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8372 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8373 struct hl_ctx *ctx)
8374 {
8375 struct gaudi_device *gaudi = hdev->asic_specific;
8376
8377 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8378 return;
8379
8380 mutex_lock(&hdev->mmu_lock);
8381 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8382 HOST_SPACE_INTERNAL_CB_SZ);
8383 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8384 HOST_SPACE_INTERNAL_CB_SZ);
8385 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8386 mutex_unlock(&hdev->mmu_lock);
8387
8388 gen_pool_destroy(hdev->internal_cb_pool);
8389
8390 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8391 hdev->internal_cb_pool_dma_addr);
8392 }
8393
gaudi_ctx_init(struct hl_ctx * ctx)8394 static int gaudi_ctx_init(struct hl_ctx *ctx)
8395 {
8396 int rc;
8397
8398 if (ctx->asid == HL_KERNEL_ASID_ID)
8399 return 0;
8400
8401 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8402 if (rc)
8403 return rc;
8404
8405 rc = gaudi_restore_user_registers(ctx->hdev);
8406 if (rc)
8407 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8408
8409 return rc;
8410 }
8411
gaudi_ctx_fini(struct hl_ctx * ctx)8412 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8413 {
8414 if (ctx->asid == HL_KERNEL_ASID_ID)
8415 return;
8416
8417 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8418 }
8419
gaudi_pre_schedule_cs(struct hl_cs * cs)8420 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8421 {
8422 return 0;
8423 }
8424
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8425 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8426 {
8427 return gaudi_cq_assignment[cq_idx];
8428 }
8429
gaudi_get_signal_cb_size(struct hl_device * hdev)8430 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8431 {
8432 return sizeof(struct packet_msg_short) +
8433 sizeof(struct packet_msg_prot) * 2;
8434 }
8435
gaudi_get_wait_cb_size(struct hl_device * hdev)8436 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8437 {
8438 return sizeof(struct packet_msg_short) * 4 +
8439 sizeof(struct packet_fence) +
8440 sizeof(struct packet_msg_prot) * 2;
8441 }
8442
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8443 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8444 {
8445 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8446 }
8447
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8448 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8449 u32 size, bool eb)
8450 {
8451 struct hl_cb *cb = (struct hl_cb *) data;
8452 struct packet_msg_short *pkt;
8453 u32 value, ctl, pkt_size = sizeof(*pkt);
8454
8455 pkt = cb->kernel_address + size;
8456 memset(pkt, 0, pkt_size);
8457
8458 /* Inc by 1, Mode ADD */
8459 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8460 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8461
8462 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8463 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8464 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8465 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8466 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8467 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8469
8470 pkt->value = cpu_to_le32(value);
8471 pkt->ctl = cpu_to_le32(ctl);
8472
8473 return size + pkt_size;
8474 }
8475
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8476 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8477 u16 addr)
8478 {
8479 u32 ctl, pkt_size = sizeof(*pkt);
8480
8481 memset(pkt, 0, pkt_size);
8482
8483 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8484 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8485 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8486 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8487 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8489
8490 pkt->value = cpu_to_le32(value);
8491 pkt->ctl = cpu_to_le32(ctl);
8492
8493 return pkt_size;
8494 }
8495
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8496 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8497 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8498 u16 sob_val, u16 mon_id)
8499 {
8500 u64 monitor_base;
8501 u32 ctl, value, pkt_size = sizeof(*pkt);
8502 u16 msg_addr_offset;
8503 u8 mask;
8504
8505 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8506 dev_err(hdev->dev,
8507 "sob_base %u (mask %#x) is not valid\n",
8508 sob_base, sob_mask);
8509 return 0;
8510 }
8511
8512 /*
8513 * monitor_base should be the content of the base0 address registers,
8514 * so it will be added to the msg short offsets
8515 */
8516 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8517
8518 msg_addr_offset =
8519 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8520 monitor_base;
8521
8522 memset(pkt, 0, pkt_size);
8523
8524 /* Monitor config packet: bind the monitor to a sync object */
8525 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8526 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8527 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8528 0); /* GREATER OR EQUAL*/
8529 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8530
8531 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8532 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8533 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8534 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8535 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8536 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8538
8539 pkt->value = cpu_to_le32(value);
8540 pkt->ctl = cpu_to_le32(ctl);
8541
8542 return pkt_size;
8543 }
8544
gaudi_add_fence_pkt(struct packet_fence * pkt)8545 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8546 {
8547 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8548
8549 memset(pkt, 0, pkt_size);
8550
8551 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8552 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8553 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8554
8555 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8556 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8557 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8558 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8559
8560 pkt->cfg = cpu_to_le32(cfg);
8561 pkt->ctl = cpu_to_le32(ctl);
8562
8563 return pkt_size;
8564 }
8565
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8566 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8567 {
8568 u32 offset, nic_index;
8569
8570 switch (queue_id) {
8571 case GAUDI_QUEUE_ID_DMA_0_0:
8572 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8573 break;
8574 case GAUDI_QUEUE_ID_DMA_0_1:
8575 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8576 break;
8577 case GAUDI_QUEUE_ID_DMA_0_2:
8578 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8579 break;
8580 case GAUDI_QUEUE_ID_DMA_0_3:
8581 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8582 break;
8583 case GAUDI_QUEUE_ID_DMA_1_0:
8584 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8585 break;
8586 case GAUDI_QUEUE_ID_DMA_1_1:
8587 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8588 break;
8589 case GAUDI_QUEUE_ID_DMA_1_2:
8590 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8591 break;
8592 case GAUDI_QUEUE_ID_DMA_1_3:
8593 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8594 break;
8595 case GAUDI_QUEUE_ID_DMA_5_0:
8596 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8597 break;
8598 case GAUDI_QUEUE_ID_DMA_5_1:
8599 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8600 break;
8601 case GAUDI_QUEUE_ID_DMA_5_2:
8602 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8603 break;
8604 case GAUDI_QUEUE_ID_DMA_5_3:
8605 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8606 break;
8607 case GAUDI_QUEUE_ID_TPC_7_0:
8608 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8609 break;
8610 case GAUDI_QUEUE_ID_TPC_7_1:
8611 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8612 break;
8613 case GAUDI_QUEUE_ID_TPC_7_2:
8614 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8615 break;
8616 case GAUDI_QUEUE_ID_TPC_7_3:
8617 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8618 break;
8619 case GAUDI_QUEUE_ID_NIC_0_0:
8620 case GAUDI_QUEUE_ID_NIC_1_0:
8621 case GAUDI_QUEUE_ID_NIC_2_0:
8622 case GAUDI_QUEUE_ID_NIC_3_0:
8623 case GAUDI_QUEUE_ID_NIC_4_0:
8624 case GAUDI_QUEUE_ID_NIC_5_0:
8625 case GAUDI_QUEUE_ID_NIC_6_0:
8626 case GAUDI_QUEUE_ID_NIC_7_0:
8627 case GAUDI_QUEUE_ID_NIC_8_0:
8628 case GAUDI_QUEUE_ID_NIC_9_0:
8629 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8630 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8631 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8632 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8633 break;
8634 case GAUDI_QUEUE_ID_NIC_0_1:
8635 case GAUDI_QUEUE_ID_NIC_1_1:
8636 case GAUDI_QUEUE_ID_NIC_2_1:
8637 case GAUDI_QUEUE_ID_NIC_3_1:
8638 case GAUDI_QUEUE_ID_NIC_4_1:
8639 case GAUDI_QUEUE_ID_NIC_5_1:
8640 case GAUDI_QUEUE_ID_NIC_6_1:
8641 case GAUDI_QUEUE_ID_NIC_7_1:
8642 case GAUDI_QUEUE_ID_NIC_8_1:
8643 case GAUDI_QUEUE_ID_NIC_9_1:
8644 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8645 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8646 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8647 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8648 break;
8649 case GAUDI_QUEUE_ID_NIC_0_2:
8650 case GAUDI_QUEUE_ID_NIC_1_2:
8651 case GAUDI_QUEUE_ID_NIC_2_2:
8652 case GAUDI_QUEUE_ID_NIC_3_2:
8653 case GAUDI_QUEUE_ID_NIC_4_2:
8654 case GAUDI_QUEUE_ID_NIC_5_2:
8655 case GAUDI_QUEUE_ID_NIC_6_2:
8656 case GAUDI_QUEUE_ID_NIC_7_2:
8657 case GAUDI_QUEUE_ID_NIC_8_2:
8658 case GAUDI_QUEUE_ID_NIC_9_2:
8659 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8660 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8661 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8662 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8663 break;
8664 case GAUDI_QUEUE_ID_NIC_0_3:
8665 case GAUDI_QUEUE_ID_NIC_1_3:
8666 case GAUDI_QUEUE_ID_NIC_2_3:
8667 case GAUDI_QUEUE_ID_NIC_3_3:
8668 case GAUDI_QUEUE_ID_NIC_4_3:
8669 case GAUDI_QUEUE_ID_NIC_5_3:
8670 case GAUDI_QUEUE_ID_NIC_6_3:
8671 case GAUDI_QUEUE_ID_NIC_7_3:
8672 case GAUDI_QUEUE_ID_NIC_8_3:
8673 case GAUDI_QUEUE_ID_NIC_9_3:
8674 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8675 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8676 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8677 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8678 break;
8679 default:
8680 return -EINVAL;
8681 }
8682
8683 *addr = CFG_BASE + offset;
8684
8685 return 0;
8686 }
8687
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8688 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8689 {
8690 u64 monitor_base;
8691 u32 size = 0;
8692 u16 msg_addr_offset;
8693
8694 /*
8695 * monitor_base should be the content of the base0 address registers,
8696 * so it will be added to the msg short offsets
8697 */
8698 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8699
8700 /* First monitor config packet: low address of the sync */
8701 msg_addr_offset =
8702 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8703 monitor_base;
8704
8705 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8706 msg_addr_offset);
8707
8708 /* Second monitor config packet: high address of the sync */
8709 msg_addr_offset =
8710 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8711 monitor_base;
8712
8713 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8714 msg_addr_offset);
8715
8716 /*
8717 * Third monitor config packet: the payload, i.e. what to write when the
8718 * sync triggers
8719 */
8720 msg_addr_offset =
8721 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8722 monitor_base;
8723
8724 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8725
8726 return size;
8727 }
8728
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8729 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8730 struct hl_gen_wait_properties *prop)
8731 {
8732 struct hl_cb *cb = (struct hl_cb *) prop->data;
8733 void *buf = cb->kernel_address;
8734 u64 fence_addr = 0;
8735 u32 size = prop->size;
8736
8737 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8738 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8739 prop->q_idx);
8740 return 0;
8741 }
8742
8743 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8744 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8745 prop->sob_mask, prop->sob_val, prop->mon_id);
8746 size += gaudi_add_fence_pkt(buf + size);
8747
8748 return size;
8749 }
8750
gaudi_reset_sob(struct hl_device * hdev,void * data)8751 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8752 {
8753 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8754
8755 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8756 hw_sob->sob_id);
8757
8758 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8759 hw_sob->sob_id * 4, 0);
8760
8761 kref_init(&hw_sob->kref);
8762 }
8763
gaudi_get_device_time(struct hl_device * hdev)8764 static u64 gaudi_get_device_time(struct hl_device *hdev)
8765 {
8766 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8767
8768 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8769 }
8770
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8771 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8772 u32 *block_size, u32 *block_id)
8773 {
8774 return -EPERM;
8775 }
8776
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8777 static int gaudi_block_mmap(struct hl_device *hdev,
8778 struct vm_area_struct *vma,
8779 u32 block_id, u32 block_size)
8780 {
8781 return -EPERM;
8782 }
8783
gaudi_enable_events_from_fw(struct hl_device * hdev)8784 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8785 {
8786 struct cpu_dyn_regs *dyn_regs =
8787 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8788 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8789 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8790 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8791
8792 WREG32(irq_handler_offset,
8793 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8794 }
8795
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8796 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8797 {
8798 return -EINVAL;
8799 }
8800
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8801 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8802 {
8803 switch (pll_idx) {
8804 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8805 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8806 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8807 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8808 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8809 case HL_GAUDI_MME_PLL: return MME_PLL;
8810 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8811 case HL_GAUDI_IF_PLL: return IF_PLL;
8812 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8813 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8814 default: return -EINVAL;
8815 }
8816 }
8817
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8818 static int gaudi_add_sync_to_engine_map_entry(
8819 struct hl_sync_to_engine_map *map, u32 reg_value,
8820 enum hl_sync_engine_type engine_type, u32 engine_id)
8821 {
8822 struct hl_sync_to_engine_map_entry *entry;
8823
8824 /* Reg value represents a partial address of sync object,
8825 * it is used as unique identifier. For this we need to
8826 * clear the cutoff cfg base bits from the value.
8827 */
8828 if (reg_value == 0 || reg_value == 0xffffffff)
8829 return 0;
8830 reg_value -= lower_32_bits(CFG_BASE);
8831
8832 /* create a new hash entry */
8833 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8834 if (!entry)
8835 return -ENOMEM;
8836 entry->engine_type = engine_type;
8837 entry->engine_id = engine_id;
8838 entry->sync_id = reg_value;
8839 hash_add(map->tb, &entry->node, reg_value);
8840
8841 return 0;
8842 }
8843
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8844 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8845 struct hl_sync_to_engine_map *map)
8846 {
8847 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8848 int i, j, rc;
8849 u32 reg_value;
8850
8851 /* Iterate over TPC engines */
8852 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8853
8854 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8855 sds->props[SP_NEXT_TPC] * i);
8856
8857 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8858 ENGINE_TPC, i);
8859 if (rc)
8860 goto free_sync_to_engine_map;
8861 }
8862
8863 /* Iterate over MME engines */
8864 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8865 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8866
8867 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8868 sds->props[SP_NEXT_MME] * i +
8869 j * sizeof(u32));
8870
8871 rc = gaudi_add_sync_to_engine_map_entry(
8872 map, reg_value, ENGINE_MME,
8873 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8874 if (rc)
8875 goto free_sync_to_engine_map;
8876 }
8877 }
8878
8879 /* Iterate over DMA engines */
8880 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8881 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8882 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8883 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8884 ENGINE_DMA, i);
8885 if (rc)
8886 goto free_sync_to_engine_map;
8887 }
8888
8889 return 0;
8890
8891 free_sync_to_engine_map:
8892 hl_state_dump_free_sync_to_engine_map(map);
8893
8894 return rc;
8895 }
8896
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8897 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8898 {
8899 return FIELD_GET(
8900 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8901 mon->status);
8902 }
8903
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8904 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8905 {
8906 const size_t max_write = 10;
8907 u32 gid, mask, sob;
8908 int i, offset;
8909
8910 /* Sync object ID is calculated as follows:
8911 * (8 * group_id + cleared bits in mask)
8912 */
8913 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8914 mon->arm_data);
8915 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8916 mon->arm_data);
8917
8918 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8919 max_write; mask >>= 1, i++) {
8920 if (!(mask & 1)) {
8921 sob = gid * MONITOR_MAX_SOBS + i;
8922
8923 if (offset > 0)
8924 offset += snprintf(sobs + offset, max_write,
8925 ", ");
8926
8927 offset += snprintf(sobs + offset, max_write, "%u", sob);
8928 }
8929 }
8930 }
8931
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8932 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8933 struct hl_device *hdev,
8934 struct hl_mon_state_dump *mon)
8935 {
8936 const char *name;
8937 char scratch_buf1[BIN_REG_STRING_SIZE],
8938 scratch_buf2[BIN_REG_STRING_SIZE];
8939 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8940
8941 name = hl_state_dump_get_monitor_name(hdev, mon);
8942 if (!name)
8943 name = "";
8944
8945 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8946
8947 return hl_snprintf_resize(
8948 buf, size, offset,
8949 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8950 mon->id, name,
8951 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8952 mon->arm_data),
8953 hl_format_as_binary(
8954 scratch_buf1, sizeof(scratch_buf1),
8955 FIELD_GET(
8956 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8957 mon->arm_data)),
8958 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8959 mon->arm_data),
8960 mon->wr_data,
8961 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8962 hl_format_as_binary(
8963 scratch_buf2, sizeof(scratch_buf2),
8964 FIELD_GET(
8965 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8966 mon->status)),
8967 monitored_sobs);
8968 }
8969
8970
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)8971 static int gaudi_print_fences_single_engine(
8972 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8973 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8974 size_t *size, size_t *offset)
8975 {
8976 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8977 int rc = -ENOMEM, i;
8978 u32 *statuses, *fences;
8979
8980 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8981 sizeof(*statuses), GFP_KERNEL);
8982 if (!statuses)
8983 goto out;
8984
8985 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8986 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8987 sizeof(*fences), GFP_KERNEL);
8988 if (!fences)
8989 goto free_status;
8990
8991 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8992 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8993
8994 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8995 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8996 fences[i] = RREG32(base_offset + i * sizeof(u32));
8997
8998 /* The actual print */
8999 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9000 u32 fence_id;
9001 u64 fence_cnt, fence_rdata;
9002 const char *engine_name;
9003
9004 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9005 statuses[i]))
9006 continue;
9007
9008 fence_id =
9009 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9010 fence_cnt = base_offset + CFG_BASE +
9011 sizeof(u32) *
9012 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9013 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9014 sds->props[SP_FENCE0_RDATA_OFFSET];
9015 engine_name = hl_sync_engine_to_string(engine_type);
9016
9017 rc = hl_snprintf_resize(
9018 buf, size, offset,
9019 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9020 engine_name, engine_id,
9021 i, fence_id,
9022 fence_cnt, engine_name, engine_id, fence_id, i,
9023 fence_rdata, engine_name, engine_id, fence_id, i,
9024 fences[fence_id],
9025 statuses[i]);
9026 if (rc)
9027 goto free_fences;
9028 }
9029
9030 rc = 0;
9031
9032 free_fences:
9033 kfree(fences);
9034 free_status:
9035 kfree(statuses);
9036 out:
9037 return rc;
9038 }
9039
9040
9041 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9042 .monitor_valid = gaudi_monitor_valid,
9043 .print_single_monitor = gaudi_print_single_monitor,
9044 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9045 .print_fences_single_engine = gaudi_print_fences_single_engine,
9046 };
9047
gaudi_state_dump_init(struct hl_device * hdev)9048 static void gaudi_state_dump_init(struct hl_device *hdev)
9049 {
9050 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9051 int i;
9052
9053 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9054 hash_add(sds->so_id_to_str_tb,
9055 &gaudi_so_id_to_str[i].node,
9056 gaudi_so_id_to_str[i].id);
9057
9058 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9059 hash_add(sds->monitor_id_to_str_tb,
9060 &gaudi_monitor_id_to_str[i].node,
9061 gaudi_monitor_id_to_str[i].id);
9062
9063 sds->props = gaudi_state_dump_specs_props;
9064
9065 sds->sync_namager_names = gaudi_sync_manager_names;
9066
9067 sds->funcs = gaudi_state_dump_funcs;
9068 }
9069
gaudi_get_stream_master_qid_arr(void)9070 static u32 *gaudi_get_stream_master_qid_arr(void)
9071 {
9072 return gaudi_stream_master;
9073 }
9074
gaudi_set_dram_properties(struct hl_device * hdev)9075 static int gaudi_set_dram_properties(struct hl_device *hdev)
9076 {
9077 return 0;
9078 }
9079
gaudi_set_binning_masks(struct hl_device * hdev)9080 static int gaudi_set_binning_masks(struct hl_device *hdev)
9081 {
9082 return 0;
9083 }
9084
gaudi_check_if_razwi_happened(struct hl_device * hdev)9085 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9086 {
9087 }
9088
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9089 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9090 {
9091 struct hl_device *hdev = dev_get_drvdata(dev);
9092 struct cpucp_info *cpucp_info;
9093
9094 cpucp_info = &hdev->asic_prop.cpucp_info;
9095
9096 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9097 }
9098
9099 static DEVICE_ATTR_RO(infineon_ver);
9100
9101 static struct attribute *gaudi_vrm_dev_attrs[] = {
9102 &dev_attr_infineon_ver.attr,
9103 NULL,
9104 };
9105
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9106 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9107 struct attribute_group *dev_vrm_attr_grp)
9108 {
9109 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9110 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9111 }
9112
gaudi_send_device_activity(struct hl_device * hdev,bool open)9113 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9114 {
9115 return 0;
9116 }
9117
9118 static const struct hl_asic_funcs gaudi_funcs = {
9119 .early_init = gaudi_early_init,
9120 .early_fini = gaudi_early_fini,
9121 .late_init = gaudi_late_init,
9122 .late_fini = gaudi_late_fini,
9123 .sw_init = gaudi_sw_init,
9124 .sw_fini = gaudi_sw_fini,
9125 .hw_init = gaudi_hw_init,
9126 .hw_fini = gaudi_hw_fini,
9127 .halt_engines = gaudi_halt_engines,
9128 .suspend = gaudi_suspend,
9129 .resume = gaudi_resume,
9130 .mmap = gaudi_mmap,
9131 .ring_doorbell = gaudi_ring_doorbell,
9132 .pqe_write = gaudi_pqe_write,
9133 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9134 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9135 .scrub_device_mem = gaudi_scrub_device_mem,
9136 .scrub_device_dram = gaudi_scrub_device_dram,
9137 .get_int_queue_base = gaudi_get_int_queue_base,
9138 .test_queues = gaudi_test_queues,
9139 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9140 .asic_dma_pool_free = gaudi_dma_pool_free,
9141 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9142 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9143 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9144 .cs_parser = gaudi_cs_parser,
9145 .asic_dma_map_sgtable = hl_dma_map_sgtable,
9146 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9147 .update_eq_ci = gaudi_update_eq_ci,
9148 .context_switch = gaudi_context_switch,
9149 .restore_phase_topology = gaudi_restore_phase_topology,
9150 .debugfs_read_dma = gaudi_debugfs_read_dma,
9151 .add_device_attr = gaudi_add_device_attr,
9152 .handle_eqe = gaudi_handle_eqe,
9153 .get_events_stat = gaudi_get_events_stat,
9154 .read_pte = gaudi_read_pte,
9155 .write_pte = gaudi_write_pte,
9156 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9157 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9158 .mmu_prefetch_cache_range = NULL,
9159 .send_heartbeat = gaudi_send_heartbeat,
9160 .debug_coresight = gaudi_debug_coresight,
9161 .is_device_idle = gaudi_is_device_idle,
9162 .compute_reset_late_init = gaudi_compute_reset_late_init,
9163 .hw_queues_lock = gaudi_hw_queues_lock,
9164 .hw_queues_unlock = gaudi_hw_queues_unlock,
9165 .get_pci_id = gaudi_get_pci_id,
9166 .get_eeprom_data = gaudi_get_eeprom_data,
9167 .get_monitor_dump = gaudi_get_monitor_dump,
9168 .send_cpu_message = gaudi_send_cpu_message,
9169 .pci_bars_map = gaudi_pci_bars_map,
9170 .init_iatu = gaudi_init_iatu,
9171 .rreg = hl_rreg,
9172 .wreg = hl_wreg,
9173 .halt_coresight = gaudi_halt_coresight,
9174 .ctx_init = gaudi_ctx_init,
9175 .ctx_fini = gaudi_ctx_fini,
9176 .pre_schedule_cs = gaudi_pre_schedule_cs,
9177 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9178 .load_firmware_to_device = gaudi_load_firmware_to_device,
9179 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9180 .get_signal_cb_size = gaudi_get_signal_cb_size,
9181 .get_wait_cb_size = gaudi_get_wait_cb_size,
9182 .gen_signal_cb = gaudi_gen_signal_cb,
9183 .gen_wait_cb = gaudi_gen_wait_cb,
9184 .reset_sob = gaudi_reset_sob,
9185 .reset_sob_group = gaudi_reset_sob_group,
9186 .get_device_time = gaudi_get_device_time,
9187 .pb_print_security_errors = NULL,
9188 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9189 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9190 .get_dec_base_addr = NULL,
9191 .scramble_addr = hl_mmu_scramble_addr,
9192 .descramble_addr = hl_mmu_descramble_addr,
9193 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9194 .get_hw_block_id = gaudi_get_hw_block_id,
9195 .hw_block_mmap = gaudi_block_mmap,
9196 .enable_events_from_fw = gaudi_enable_events_from_fw,
9197 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9198 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9199 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9200 .init_firmware_loader = gaudi_init_firmware_loader,
9201 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9202 .state_dump_init = gaudi_state_dump_init,
9203 .get_sob_addr = gaudi_get_sob_addr,
9204 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9205 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9206 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9207 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9208 .access_dev_mem = hl_access_dev_mem,
9209 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9210 .send_device_activity = gaudi_send_device_activity,
9211 .set_dram_properties = gaudi_set_dram_properties,
9212 .set_binning_masks = gaudi_set_binning_masks,
9213 };
9214
9215 /**
9216 * gaudi_set_asic_funcs - set GAUDI function pointers
9217 *
9218 * @hdev: pointer to hl_device structure
9219 *
9220 */
gaudi_set_asic_funcs(struct hl_device * hdev)9221 void gaudi_set_asic_funcs(struct hl_device *hdev)
9222 {
9223 hdev->asic_funcs = &gaudi_funcs;
9224 }
9225