1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85 #define GAUDI_MAX_STRING_LEN 20 86 87 #define GAUDI_CB_POOL_CB_CNT 512 88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 101 102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 103 104 #define MONITOR_SOB_STRING_SIZE 256 105 106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 107 GAUDI_QUEUE_ID_DMA_0_0, 108 GAUDI_QUEUE_ID_DMA_0_1, 109 GAUDI_QUEUE_ID_DMA_0_2, 110 GAUDI_QUEUE_ID_DMA_0_3, 111 GAUDI_QUEUE_ID_DMA_1_0, 112 GAUDI_QUEUE_ID_DMA_1_1, 113 GAUDI_QUEUE_ID_DMA_1_2, 114 GAUDI_QUEUE_ID_DMA_1_3 115 }; 116 117 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 118 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 119 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 120 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 121 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 122 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 123 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 124 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 125 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 126 }; 127 128 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 129 [0] = GAUDI_QUEUE_ID_DMA_0_0, 130 [1] = GAUDI_QUEUE_ID_DMA_0_1, 131 [2] = GAUDI_QUEUE_ID_DMA_0_2, 132 [3] = GAUDI_QUEUE_ID_DMA_0_3, 133 [4] = GAUDI_QUEUE_ID_DMA_1_0, 134 [5] = GAUDI_QUEUE_ID_DMA_1_1, 135 [6] = GAUDI_QUEUE_ID_DMA_1_2, 136 [7] = GAUDI_QUEUE_ID_DMA_1_3, 137 }; 138 139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 140 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 145 [PACKET_REPEAT] = sizeof(struct packet_repeat), 146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 147 [PACKET_FENCE] = sizeof(struct packet_fence), 148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 149 [PACKET_NOP] = sizeof(struct packet_nop), 150 [PACKET_STOP] = sizeof(struct packet_stop), 151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 152 [PACKET_WAIT] = sizeof(struct packet_wait), 153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 154 }; 155 156 static inline bool validate_packet_id(enum packet_id id) 157 { 158 switch (id) { 159 case PACKET_WREG_32: 160 case PACKET_WREG_BULK: 161 case PACKET_MSG_LONG: 162 case PACKET_MSG_SHORT: 163 case PACKET_CP_DMA: 164 case PACKET_REPEAT: 165 case PACKET_MSG_PROT: 166 case PACKET_FENCE: 167 case PACKET_LIN_DMA: 168 case PACKET_NOP: 169 case PACKET_STOP: 170 case PACKET_ARB_POINT: 171 case PACKET_WAIT: 172 case PACKET_LOAD_AND_EXE: 173 return true; 174 default: 175 return false; 176 } 177 } 178 179 static const char * const 180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 181 "tpc_address_exceed_slm", 182 "tpc_div_by_0", 183 "tpc_spu_mac_overflow", 184 "tpc_spu_addsub_overflow", 185 "tpc_spu_abs_overflow", 186 "tpc_spu_fp_dst_nan_inf", 187 "tpc_spu_fp_dst_denorm", 188 "tpc_vpu_mac_overflow", 189 "tpc_vpu_addsub_overflow", 190 "tpc_vpu_abs_overflow", 191 "tpc_vpu_fp_dst_nan_inf", 192 "tpc_vpu_fp_dst_denorm", 193 "tpc_assertions", 194 "tpc_illegal_instruction", 195 "tpc_pc_wrap_around", 196 "tpc_qm_sw_err", 197 "tpc_hbw_rresp_err", 198 "tpc_hbw_bresp_err", 199 "tpc_lbw_rresp_err", 200 "tpc_lbw_bresp_err" 201 }; 202 203 static const char * const 204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 205 "PQ AXI HBW error", 206 "CQ AXI HBW error", 207 "CP AXI HBW error", 208 "CP error due to undefined OPCODE", 209 "CP encountered STOP OPCODE", 210 "CP AXI LBW error", 211 "CP WRREG32 or WRBULK returned error", 212 "N/A", 213 "FENCE 0 inc over max value and clipped", 214 "FENCE 1 inc over max value and clipped", 215 "FENCE 2 inc over max value and clipped", 216 "FENCE 3 inc over max value and clipped", 217 "FENCE 0 dec under min value and clipped", 218 "FENCE 1 dec under min value and clipped", 219 "FENCE 2 dec under min value and clipped", 220 "FENCE 3 dec under min value and clipped" 221 }; 222 223 static const char * const 224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 225 "Choice push while full error", 226 "Choice Q watchdog error", 227 "MSG AXI LBW returned with error" 228 }; 229 230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 344 }; 345 346 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 347 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 348 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 349 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 350 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 351 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 352 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 353 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 354 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 355 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 356 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 357 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 358 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 359 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 360 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 361 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 362 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 363 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 364 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 365 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 366 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 367 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 368 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 369 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 370 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 371 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 372 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 373 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 374 }; 375 376 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 377 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 378 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 379 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 380 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 381 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 382 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 383 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 384 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 385 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 386 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 387 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 388 }; 389 390 static s64 gaudi_state_dump_specs_props[] = { 391 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 392 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 393 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 394 [SP_MON_OBJ_WR_ADDR_LOW] = 395 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 396 [SP_MON_OBJ_WR_ADDR_HIGH] = 397 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 398 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 399 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 400 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 401 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 402 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 403 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 404 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 405 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 406 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 407 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 408 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 409 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 410 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 411 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 412 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 413 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 414 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 415 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 416 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 417 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 418 [SP_FENCE0_CNT_OFFSET] = 419 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 420 [SP_FENCE0_RDATA_OFFSET] = 421 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 422 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 423 [SP_NUM_CORES] = 1, 424 }; 425 426 static const int gaudi_queue_id_to_engine_id[] = { 427 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 428 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 429 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 430 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 431 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 432 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 433 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 434 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 435 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 436 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 437 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 438 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 439 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 440 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 441 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 442 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 443 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 444 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 445 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 446 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 447 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 448 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 449 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 450 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 451 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 452 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 453 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 454 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 455 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 456 }; 457 458 /* The order here is opposite to the order of the indexing in the h/w. 459 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 460 */ 461 static const char * const gaudi_sync_manager_names[] = { 462 "SYNC_MGR_E_N", 463 "SYNC_MGR_W_N", 464 "SYNC_MGR_E_S", 465 "SYNC_MGR_W_S", 466 NULL 467 }; 468 469 struct ecc_info_extract_params { 470 u64 block_address; 471 u32 num_memories; 472 bool derr; 473 }; 474 475 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 476 u64 phys_addr); 477 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 478 struct hl_cs_job *job); 479 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 480 u32 size, u64 val); 481 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 482 u32 num_regs, u32 val); 483 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 484 u32 tpc_id); 485 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 486 static int gaudi_cpucp_info_get(struct hl_device *hdev); 487 static void gaudi_disable_clock_gating(struct hl_device *hdev); 488 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 489 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 490 u32 size, bool eb); 491 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 492 struct hl_gen_wait_properties *prop); 493 static inline enum hl_collective_mode 494 get_collective_mode(struct hl_device *hdev, u32 queue_id) 495 { 496 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 497 return HL_COLLECTIVE_MASTER; 498 499 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 500 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 501 return HL_COLLECTIVE_SLAVE; 502 503 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 504 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 505 return HL_COLLECTIVE_SLAVE; 506 507 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 508 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 509 return HL_COLLECTIVE_SLAVE; 510 511 return HL_COLLECTIVE_NOT_SUPPORTED; 512 } 513 514 static inline void set_default_power_values(struct hl_device *hdev) 515 { 516 struct asic_fixed_properties *prop = &hdev->asic_prop; 517 518 if (hdev->card_type == cpucp_card_type_pmc) { 519 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 520 521 if (prop->fw_security_enabled) 522 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 523 else 524 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 525 } else { 526 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 527 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 528 } 529 } 530 531 static int gaudi_set_fixed_properties(struct hl_device *hdev) 532 { 533 struct asic_fixed_properties *prop = &hdev->asic_prop; 534 u32 num_sync_stream_queues = 0; 535 int i; 536 537 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 538 prop->hw_queues_props = kcalloc(prop->max_queues, 539 sizeof(struct hw_queue_properties), 540 GFP_KERNEL); 541 542 if (!prop->hw_queues_props) 543 return -ENOMEM; 544 545 for (i = 0 ; i < prop->max_queues ; i++) { 546 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 547 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 548 prop->hw_queues_props[i].driver_only = 0; 549 prop->hw_queues_props[i].supports_sync_stream = 1; 550 prop->hw_queues_props[i].cb_alloc_flags = 551 CB_ALLOC_KERNEL; 552 num_sync_stream_queues++; 553 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 554 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 555 prop->hw_queues_props[i].driver_only = 1; 556 prop->hw_queues_props[i].supports_sync_stream = 0; 557 prop->hw_queues_props[i].cb_alloc_flags = 558 CB_ALLOC_KERNEL; 559 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 560 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 561 prop->hw_queues_props[i].driver_only = 0; 562 prop->hw_queues_props[i].supports_sync_stream = 0; 563 prop->hw_queues_props[i].cb_alloc_flags = 564 CB_ALLOC_USER; 565 566 } 567 prop->hw_queues_props[i].collective_mode = 568 get_collective_mode(hdev, i); 569 } 570 571 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 572 prop->cfg_base_address = CFG_BASE; 573 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 574 prop->host_base_address = HOST_PHYS_BASE; 575 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 576 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 577 prop->completion_mode = HL_COMPLETION_MODE_JOB; 578 prop->collective_first_sob = 0; 579 prop->collective_first_mon = 0; 580 581 /* 2 SOBs per internal queue stream are reserved for collective */ 582 prop->sync_stream_first_sob = 583 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 584 * QMAN_STREAMS * HL_RSVD_SOBS; 585 586 /* 1 monitor per internal queue stream are reserved for collective 587 * 2 monitors per external queue stream are reserved for collective 588 */ 589 prop->sync_stream_first_mon = 590 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 591 (NUMBER_OF_EXT_HW_QUEUES * 2); 592 593 prop->dram_base_address = DRAM_PHYS_BASE; 594 prop->dram_size = GAUDI_HBM_SIZE_32GB; 595 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 596 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 597 598 prop->sram_base_address = SRAM_BASE_ADDR; 599 prop->sram_size = SRAM_SIZE; 600 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 601 prop->sram_user_base_address = 602 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 603 604 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 605 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 606 607 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 608 if (hdev->pldm) 609 prop->mmu_pgt_size = 0x800000; /* 8MB */ 610 else 611 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 612 prop->mmu_pte_size = HL_PTE_SIZE; 613 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 614 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 615 prop->dram_page_size = PAGE_SIZE_2MB; 616 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 617 prop->dram_supports_virtual_memory = false; 618 619 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 620 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 621 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 622 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 623 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 624 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 625 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 626 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 627 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 628 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 629 prop->pmmu.start_addr = VA_HOST_SPACE_START; 630 prop->pmmu.end_addr = 631 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 632 prop->pmmu.page_size = PAGE_SIZE_4KB; 633 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 634 prop->pmmu.last_mask = LAST_MASK; 635 /* TODO: will be duplicated until implementing per-MMU props */ 636 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 637 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 638 639 /* PMMU and HPMMU are the same except of page size */ 640 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 641 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 642 643 /* shifts and masks are the same in PMMU and DMMU */ 644 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 645 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 646 prop->dmmu.end_addr = VA_HOST_SPACE_END; 647 prop->dmmu.page_size = PAGE_SIZE_2MB; 648 649 prop->cfg_size = CFG_SIZE; 650 prop->max_asid = MAX_ASID; 651 prop->num_of_events = GAUDI_EVENT_SIZE; 652 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 653 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 654 655 set_default_power_values(hdev); 656 657 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 658 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 659 660 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 661 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 662 663 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 664 CARD_NAME_MAX_LEN); 665 666 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 667 668 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 669 prop->sync_stream_first_sob + 670 (num_sync_stream_queues * HL_RSVD_SOBS); 671 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 672 prop->sync_stream_first_mon + 673 (num_sync_stream_queues * HL_RSVD_MONS); 674 675 prop->first_available_user_interrupt = USHRT_MAX; 676 prop->tpc_interrupt_id = USHRT_MAX; 677 678 /* single msi */ 679 prop->eq_interrupt_id = 0; 680 681 for (i = 0 ; i < HL_MAX_DCORES ; i++) 682 prop->first_available_cq[i] = USHRT_MAX; 683 684 prop->fw_cpu_boot_dev_sts0_valid = false; 685 prop->fw_cpu_boot_dev_sts1_valid = false; 686 prop->hard_reset_done_by_fw = false; 687 prop->gic_interrupts_enable = true; 688 689 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 690 691 prop->clk_pll_index = HL_GAUDI_MME_PLL; 692 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 693 694 prop->use_get_power_for_reset_history = true; 695 696 prop->configurable_stop_on_err = true; 697 698 prop->set_max_power_on_device_init = true; 699 700 prop->dma_mask = 48; 701 702 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 703 704 return 0; 705 } 706 707 static int gaudi_pci_bars_map(struct hl_device *hdev) 708 { 709 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 710 bool is_wc[3] = {false, false, true}; 711 int rc; 712 713 rc = hl_pci_bars_map(hdev, name, is_wc); 714 if (rc) 715 return rc; 716 717 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 718 (CFG_BASE - SPI_FLASH_BASE_ADDR); 719 720 return 0; 721 } 722 723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 724 { 725 struct gaudi_device *gaudi = hdev->asic_specific; 726 struct hl_inbound_pci_region pci_region; 727 u64 old_addr = addr; 728 int rc; 729 730 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 731 return old_addr; 732 733 if (hdev->asic_prop.iatu_done_by_fw) 734 return U64_MAX; 735 736 /* Inbound Region 2 - Bar 4 - Point to HBM */ 737 pci_region.mode = PCI_BAR_MATCH_MODE; 738 pci_region.bar = HBM_BAR_ID; 739 pci_region.addr = addr; 740 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 741 if (rc) 742 return U64_MAX; 743 744 if (gaudi) { 745 old_addr = gaudi->hbm_bar_cur_addr; 746 gaudi->hbm_bar_cur_addr = addr; 747 } 748 749 return old_addr; 750 } 751 752 static int gaudi_init_iatu(struct hl_device *hdev) 753 { 754 struct hl_inbound_pci_region inbound_region; 755 struct hl_outbound_pci_region outbound_region; 756 int rc; 757 758 if (hdev->asic_prop.iatu_done_by_fw) 759 return 0; 760 761 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 762 inbound_region.mode = PCI_BAR_MATCH_MODE; 763 inbound_region.bar = SRAM_BAR_ID; 764 inbound_region.addr = SRAM_BASE_ADDR; 765 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 766 if (rc) 767 goto done; 768 769 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 770 inbound_region.mode = PCI_BAR_MATCH_MODE; 771 inbound_region.bar = CFG_BAR_ID; 772 inbound_region.addr = SPI_FLASH_BASE_ADDR; 773 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 774 if (rc) 775 goto done; 776 777 /* Inbound Region 2 - Bar 4 - Point to HBM */ 778 inbound_region.mode = PCI_BAR_MATCH_MODE; 779 inbound_region.bar = HBM_BAR_ID; 780 inbound_region.addr = DRAM_PHYS_BASE; 781 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 782 if (rc) 783 goto done; 784 785 /* Outbound Region 0 - Point to Host */ 786 outbound_region.addr = HOST_PHYS_BASE; 787 outbound_region.size = HOST_PHYS_SIZE; 788 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 789 790 done: 791 return rc; 792 } 793 794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 795 { 796 return RREG32(mmHW_STATE); 797 } 798 799 static int gaudi_early_init(struct hl_device *hdev) 800 { 801 struct asic_fixed_properties *prop = &hdev->asic_prop; 802 struct pci_dev *pdev = hdev->pdev; 803 resource_size_t pci_bar_size; 804 u32 fw_boot_status; 805 int rc; 806 807 rc = gaudi_set_fixed_properties(hdev); 808 if (rc) { 809 dev_err(hdev->dev, "Failed setting fixed properties\n"); 810 return rc; 811 } 812 813 /* Check BAR sizes */ 814 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 815 816 if (pci_bar_size != SRAM_BAR_SIZE) { 817 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 818 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 819 rc = -ENODEV; 820 goto free_queue_props; 821 } 822 823 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 824 825 if (pci_bar_size != CFG_BAR_SIZE) { 826 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 827 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 828 rc = -ENODEV; 829 goto free_queue_props; 830 } 831 832 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 833 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 834 835 /* If FW security is enabled at this point it means no access to ELBI */ 836 if (hdev->asic_prop.fw_security_enabled) { 837 hdev->asic_prop.iatu_done_by_fw = true; 838 839 /* 840 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 841 * decision can only be taken based on PCI ID security. 842 */ 843 hdev->asic_prop.gic_interrupts_enable = false; 844 goto pci_init; 845 } 846 847 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 848 &fw_boot_status); 849 if (rc) 850 goto free_queue_props; 851 852 /* Check whether FW is configuring iATU */ 853 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 854 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 855 hdev->asic_prop.iatu_done_by_fw = true; 856 857 pci_init: 858 rc = hl_pci_init(hdev); 859 if (rc) 860 goto free_queue_props; 861 862 /* Before continuing in the initialization, we need to read the preboot 863 * version to determine whether we run with a security-enabled firmware 864 */ 865 rc = hl_fw_read_preboot_status(hdev); 866 if (rc) { 867 if (hdev->reset_on_preboot_fail) 868 /* we are already on failure flow, so don't check if hw_fini fails. */ 869 hdev->asic_funcs->hw_fini(hdev, true, false); 870 goto pci_fini; 871 } 872 873 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 874 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 875 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 876 if (rc) { 877 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 878 goto pci_fini; 879 } 880 } 881 882 return 0; 883 884 pci_fini: 885 hl_pci_fini(hdev); 886 free_queue_props: 887 kfree(hdev->asic_prop.hw_queues_props); 888 return rc; 889 } 890 891 static int gaudi_early_fini(struct hl_device *hdev) 892 { 893 kfree(hdev->asic_prop.hw_queues_props); 894 hl_pci_fini(hdev); 895 896 return 0; 897 } 898 899 /** 900 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 901 * 902 * @hdev: pointer to hl_device structure 903 * 904 */ 905 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 906 { 907 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 908 struct asic_fixed_properties *prop = &hdev->asic_prop; 909 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 910 int rc; 911 912 if ((hdev->fw_components & FW_TYPE_LINUX) && 913 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 914 struct gaudi_device *gaudi = hdev->asic_specific; 915 916 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 917 return 0; 918 919 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 920 921 if (rc) 922 return rc; 923 924 freq = pll_freq_arr[2]; 925 } else { 926 /* Backward compatibility */ 927 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 928 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 929 nr = RREG32(mmPSOC_CPU_PLL_NR); 930 nf = RREG32(mmPSOC_CPU_PLL_NF); 931 od = RREG32(mmPSOC_CPU_PLL_OD); 932 933 if (div_sel == DIV_SEL_REF_CLK || 934 div_sel == DIV_SEL_DIVIDED_REF) { 935 if (div_sel == DIV_SEL_REF_CLK) 936 freq = PLL_REF_CLK; 937 else 938 freq = PLL_REF_CLK / (div_fctr + 1); 939 } else if (div_sel == DIV_SEL_PLL_CLK || 940 div_sel == DIV_SEL_DIVIDED_PLL) { 941 pll_clk = PLL_REF_CLK * (nf + 1) / 942 ((nr + 1) * (od + 1)); 943 if (div_sel == DIV_SEL_PLL_CLK) 944 freq = pll_clk; 945 else 946 freq = pll_clk / (div_fctr + 1); 947 } else { 948 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 949 freq = 0; 950 } 951 } 952 953 prop->psoc_timestamp_frequency = freq; 954 prop->psoc_pci_pll_nr = nr; 955 prop->psoc_pci_pll_nf = nf; 956 prop->psoc_pci_pll_od = od; 957 prop->psoc_pci_pll_div_factor = div_fctr; 958 959 return 0; 960 } 961 962 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 963 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 964 { 965 struct asic_fixed_properties *prop = &hdev->asic_prop; 966 struct packet_lin_dma *init_tpc_mem_pkt; 967 struct hl_cs_job *job; 968 struct hl_cb *cb; 969 u64 dst_addr; 970 u32 cb_size, ctl; 971 u8 tpc_id; 972 int rc; 973 974 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 975 if (!cb) 976 return -EFAULT; 977 978 init_tpc_mem_pkt = cb->kernel_address; 979 cb_size = sizeof(*init_tpc_mem_pkt); 980 memset(init_tpc_mem_pkt, 0, cb_size); 981 982 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 983 984 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 985 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 986 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 987 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 988 989 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 990 991 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 992 993 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 994 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 995 round_up(prop->sram_user_base_address, SZ_8K)); 996 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 997 998 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 999 if (!job) { 1000 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1001 rc = -ENOMEM; 1002 goto release_cb; 1003 } 1004 1005 job->id = 0; 1006 job->user_cb = cb; 1007 atomic_inc(&job->user_cb->cs_cnt); 1008 job->user_cb_size = cb_size; 1009 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1010 job->patched_cb = job->user_cb; 1011 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1012 1013 hl_debugfs_add_job(hdev, job); 1014 1015 rc = gaudi_send_job_on_qman0(hdev, job); 1016 1017 if (rc) 1018 goto free_job; 1019 1020 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1021 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1022 if (rc) 1023 break; 1024 } 1025 1026 free_job: 1027 hl_userptr_delete_list(hdev, &job->userptr_list); 1028 hl_debugfs_remove_job(hdev, job); 1029 kfree(job); 1030 atomic_dec(&cb->cs_cnt); 1031 1032 release_cb: 1033 hl_cb_put(cb); 1034 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1035 1036 return rc; 1037 } 1038 1039 /* 1040 * gaudi_init_tpc_mem() - Initialize TPC memories. 1041 * @hdev: Pointer to hl_device structure. 1042 * 1043 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1044 * 1045 * Return: 0 for success, negative value for error. 1046 */ 1047 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1048 { 1049 const struct firmware *fw; 1050 size_t fw_size; 1051 void *cpu_addr; 1052 dma_addr_t dma_handle; 1053 int rc, count = 5; 1054 1055 again: 1056 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1057 if (rc == -EINTR && count-- > 0) { 1058 msleep(50); 1059 goto again; 1060 } 1061 1062 if (rc) { 1063 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1064 GAUDI_TPC_FW_FILE); 1065 goto out; 1066 } 1067 1068 fw_size = fw->size; 1069 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1070 if (!cpu_addr) { 1071 dev_err(hdev->dev, 1072 "Failed to allocate %zu of dma memory for TPC kernel\n", 1073 fw_size); 1074 rc = -ENOMEM; 1075 goto out; 1076 } 1077 1078 memcpy(cpu_addr, fw->data, fw_size); 1079 1080 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1081 1082 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1083 1084 out: 1085 release_firmware(fw); 1086 return rc; 1087 } 1088 1089 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1090 { 1091 struct gaudi_device *gaudi = hdev->asic_specific; 1092 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1093 struct hl_hw_queue *q; 1094 u32 i, sob_id, sob_group_id, queue_id; 1095 1096 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1097 sob_group_id = 1098 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1099 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1100 1101 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1102 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1103 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1104 q->sync_stream_prop.collective_sob_id = sob_id + i; 1105 } 1106 1107 /* Both DMA5 and TPC7 use the same resources since only a single 1108 * engine need to participate in the reduction process 1109 */ 1110 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1111 q = &hdev->kernel_queues[queue_id]; 1112 q->sync_stream_prop.collective_sob_id = 1113 sob_id + NIC_NUMBER_OF_ENGINES; 1114 1115 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1116 q = &hdev->kernel_queues[queue_id]; 1117 q->sync_stream_prop.collective_sob_id = 1118 sob_id + NIC_NUMBER_OF_ENGINES; 1119 } 1120 1121 static void gaudi_sob_group_hw_reset(struct kref *ref) 1122 { 1123 struct gaudi_hw_sob_group *hw_sob_group = 1124 container_of(ref, struct gaudi_hw_sob_group, kref); 1125 struct hl_device *hdev = hw_sob_group->hdev; 1126 int i; 1127 1128 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1129 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1130 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1131 1132 kref_init(&hw_sob_group->kref); 1133 } 1134 1135 static void gaudi_sob_group_reset_error(struct kref *ref) 1136 { 1137 struct gaudi_hw_sob_group *hw_sob_group = 1138 container_of(ref, struct gaudi_hw_sob_group, kref); 1139 struct hl_device *hdev = hw_sob_group->hdev; 1140 1141 dev_crit(hdev->dev, 1142 "SOB release shouldn't be called here, base_sob_id: %d\n", 1143 hw_sob_group->base_sob_id); 1144 } 1145 1146 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1147 { 1148 struct gaudi_collective_properties *prop; 1149 int i; 1150 1151 prop = &gaudi->collective_props; 1152 1153 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1154 1155 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1156 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1157 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1158 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1159 /* Set collective engine bit */ 1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1161 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1162 } 1163 1164 static int gaudi_collective_init(struct hl_device *hdev) 1165 { 1166 u32 i, sob_id, reserved_sobs_per_group; 1167 struct gaudi_collective_properties *prop; 1168 struct gaudi_device *gaudi; 1169 1170 gaudi = hdev->asic_specific; 1171 prop = &gaudi->collective_props; 1172 sob_id = hdev->asic_prop.collective_first_sob; 1173 1174 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1175 reserved_sobs_per_group = 1176 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1177 1178 /* Init SOB groups */ 1179 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1180 prop->hw_sob_group[i].hdev = hdev; 1181 prop->hw_sob_group[i].base_sob_id = sob_id; 1182 sob_id += reserved_sobs_per_group; 1183 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1184 } 1185 1186 for (i = 0 ; i < QMAN_STREAMS; i++) { 1187 prop->next_sob_group_val[i] = 1; 1188 prop->curr_sob_group_idx[i] = 0; 1189 gaudi_collective_map_sobs(hdev, i); 1190 } 1191 1192 gaudi_collective_mstr_sob_mask_set(gaudi); 1193 1194 return 0; 1195 } 1196 1197 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1198 { 1199 struct gaudi_device *gaudi = hdev->asic_specific; 1200 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1201 1202 kref_put(&cprop->hw_sob_group[sob_group].kref, 1203 gaudi_sob_group_hw_reset); 1204 } 1205 1206 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1207 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1208 { 1209 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1210 struct gaudi_collective_properties *cprop; 1211 struct hl_gen_wait_properties wait_prop; 1212 struct hl_sync_stream_properties *prop; 1213 struct gaudi_device *gaudi; 1214 1215 gaudi = hdev->asic_specific; 1216 cprop = &gaudi->collective_props; 1217 queue_id = job->hw_queue_id; 1218 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1219 1220 master_sob_base = 1221 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1222 master_monitor = prop->collective_mstr_mon_id[0]; 1223 1224 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1225 1226 dev_dbg(hdev->dev, 1227 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1228 master_sob_base, cprop->mstr_sob_mask[0], 1229 cprop->next_sob_group_val[stream], 1230 master_monitor, queue_id); 1231 1232 wait_prop.data = (void *) job->patched_cb; 1233 wait_prop.sob_base = master_sob_base; 1234 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1235 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1236 wait_prop.mon_id = master_monitor; 1237 wait_prop.q_idx = queue_id; 1238 wait_prop.size = cb_size; 1239 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1240 1241 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1242 master_monitor = prop->collective_mstr_mon_id[1]; 1243 1244 dev_dbg(hdev->dev, 1245 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1246 master_sob_base, cprop->mstr_sob_mask[1], 1247 cprop->next_sob_group_val[stream], 1248 master_monitor, queue_id); 1249 1250 wait_prop.sob_base = master_sob_base; 1251 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1252 wait_prop.mon_id = master_monitor; 1253 wait_prop.size = cb_size; 1254 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1255 } 1256 1257 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1258 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1259 { 1260 struct hl_gen_wait_properties wait_prop; 1261 struct hl_sync_stream_properties *prop; 1262 u32 queue_id, cb_size = 0; 1263 1264 queue_id = job->hw_queue_id; 1265 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1266 1267 if (job->cs->encaps_signals) { 1268 /* use the encaps signal handle store earlier in the flow 1269 * and set the SOB information from the encaps 1270 * signals handle 1271 */ 1272 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1273 cs_cmpl); 1274 1275 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1276 job->cs->sequence, 1277 cs_cmpl->hw_sob->sob_id, 1278 cs_cmpl->sob_val); 1279 } 1280 1281 /* Add to wait CBs using slave monitor */ 1282 wait_prop.data = (void *) job->user_cb; 1283 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1284 wait_prop.sob_mask = 0x1; 1285 wait_prop.sob_val = cs_cmpl->sob_val; 1286 wait_prop.mon_id = prop->collective_slave_mon_id; 1287 wait_prop.q_idx = queue_id; 1288 wait_prop.size = cb_size; 1289 1290 dev_dbg(hdev->dev, 1291 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1292 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1293 prop->collective_slave_mon_id, queue_id); 1294 1295 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1296 1297 dev_dbg(hdev->dev, 1298 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1299 prop->collective_sob_id, queue_id); 1300 1301 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1302 prop->collective_sob_id, cb_size, false); 1303 } 1304 1305 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1306 { 1307 struct hl_cs_compl *signal_cs_cmpl = 1308 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1309 struct hl_cs_compl *cs_cmpl = 1310 container_of(cs->fence, struct hl_cs_compl, base_fence); 1311 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1312 struct gaudi_collective_properties *cprop; 1313 u32 stream, queue_id, sob_group_offset; 1314 struct gaudi_device *gaudi; 1315 struct hl_device *hdev; 1316 struct hl_cs_job *job; 1317 struct hl_ctx *ctx; 1318 1319 ctx = cs->ctx; 1320 hdev = ctx->hdev; 1321 gaudi = hdev->asic_specific; 1322 cprop = &gaudi->collective_props; 1323 1324 if (cs->encaps_signals) { 1325 cs_cmpl->hw_sob = handle->hw_sob; 1326 /* at this checkpoint we only need the hw_sob pointer 1327 * for the completion check before start going over the jobs 1328 * of the master/slaves, the sob_value will be taken later on 1329 * in gaudi_collective_slave_init_job depends on each 1330 * job wait offset value. 1331 */ 1332 cs_cmpl->sob_val = 0; 1333 } else { 1334 /* copy the SOB id and value of the signal CS */ 1335 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1336 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1337 } 1338 1339 /* check again if the signal cs already completed. 1340 * if yes then don't send any wait cs since the hw_sob 1341 * could be in reset already. if signal is not completed 1342 * then get refcount to hw_sob to prevent resetting the sob 1343 * while wait cs is not submitted. 1344 * note that this check is protected by two locks, 1345 * hw queue lock and completion object lock, 1346 * and the same completion object lock also protects 1347 * the hw_sob reset handler function. 1348 * The hw_queue lock prevent out of sync of hw_sob 1349 * refcount value, changed by signal/wait flows. 1350 */ 1351 spin_lock(&signal_cs_cmpl->lock); 1352 1353 if (completion_done(&cs->signal_fence->completion)) { 1354 spin_unlock(&signal_cs_cmpl->lock); 1355 return -EINVAL; 1356 } 1357 /* Increment kref since all slave queues are now waiting on it */ 1358 kref_get(&cs_cmpl->hw_sob->kref); 1359 1360 spin_unlock(&signal_cs_cmpl->lock); 1361 1362 /* Calculate the stream from collective master queue (1st job) */ 1363 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1364 stream = job->hw_queue_id % 4; 1365 sob_group_offset = 1366 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1367 1368 list_for_each_entry(job, &cs->job_list, cs_node) { 1369 queue_id = job->hw_queue_id; 1370 1371 if (hdev->kernel_queues[queue_id].collective_mode == 1372 HL_COLLECTIVE_MASTER) 1373 gaudi_collective_master_init_job(hdev, job, stream, 1374 sob_group_offset); 1375 else 1376 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1377 } 1378 1379 cs_cmpl->sob_group = sob_group_offset; 1380 1381 /* Handle sob group kref and wraparound */ 1382 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1383 cprop->next_sob_group_val[stream]++; 1384 1385 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1386 /* 1387 * Decrement as we reached the max value. 1388 * The release function won't be called here as we've 1389 * just incremented the refcount. 1390 */ 1391 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1392 gaudi_sob_group_reset_error); 1393 cprop->next_sob_group_val[stream] = 1; 1394 /* only two SOBs are currently in use */ 1395 cprop->curr_sob_group_idx[stream] = 1396 (cprop->curr_sob_group_idx[stream] + 1) & 1397 (HL_RSVD_SOBS - 1); 1398 1399 gaudi_collective_map_sobs(hdev, stream); 1400 1401 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1402 cprop->curr_sob_group_idx[stream], stream); 1403 } 1404 1405 mb(); 1406 hl_fence_put(cs->signal_fence); 1407 cs->signal_fence = NULL; 1408 1409 return 0; 1410 } 1411 1412 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1413 { 1414 u32 cacheline_end, additional_commands; 1415 1416 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1417 additional_commands = sizeof(struct packet_msg_prot) * 2; 1418 1419 if (user_cb_size + additional_commands > cacheline_end) 1420 return cacheline_end - user_cb_size + additional_commands; 1421 else 1422 return additional_commands; 1423 } 1424 1425 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1426 struct hl_ctx *ctx, struct hl_cs *cs, 1427 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1428 u32 encaps_signal_offset) 1429 { 1430 struct hw_queue_properties *hw_queue_prop; 1431 struct hl_cs_counters_atomic *cntr; 1432 struct hl_cs_job *job; 1433 struct hl_cb *cb; 1434 u32 cb_size; 1435 bool patched_cb; 1436 1437 cntr = &hdev->aggregated_cs_counters; 1438 1439 if (mode == HL_COLLECTIVE_MASTER) { 1440 /* CB size of collective master queue contains 1441 * 4 msg short packets for monitor 1 configuration 1442 * 1 fence packet 1443 * 4 msg short packets for monitor 2 configuration 1444 * 1 fence packet 1445 * 2 msg prot packets for completion and MSI 1446 */ 1447 cb_size = sizeof(struct packet_msg_short) * 8 + 1448 sizeof(struct packet_fence) * 2 + 1449 sizeof(struct packet_msg_prot) * 2; 1450 patched_cb = true; 1451 } else { 1452 /* CB size of collective slave queues contains 1453 * 4 msg short packets for monitor configuration 1454 * 1 fence packet 1455 * 1 additional msg short packet for sob signal 1456 */ 1457 cb_size = sizeof(struct packet_msg_short) * 5 + 1458 sizeof(struct packet_fence); 1459 patched_cb = false; 1460 } 1461 1462 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1463 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1464 if (!job) { 1465 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1466 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1467 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1468 return -ENOMEM; 1469 } 1470 1471 /* Allocate internal mapped CB for non patched CBs */ 1472 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb); 1473 if (!cb) { 1474 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1475 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1476 kfree(job); 1477 return -EFAULT; 1478 } 1479 1480 job->id = 0; 1481 job->cs = cs; 1482 job->user_cb = cb; 1483 atomic_inc(&job->user_cb->cs_cnt); 1484 job->user_cb_size = cb_size; 1485 job->hw_queue_id = queue_id; 1486 1487 /* since its guaranteed to have only one chunk in the collective wait 1488 * cs, we can use this chunk to set the encapsulated signal offset 1489 * in the jobs. 1490 */ 1491 if (cs->encaps_signals) 1492 job->encaps_sig_wait_offset = encaps_signal_offset; 1493 1494 /* 1495 * No need in parsing, user CB is the patched CB. 1496 * We call hl_cb_destroy() out of two reasons - we don't need 1497 * the CB in the CB idr anymore and to decrement its refcount as 1498 * it was incremented inside hl_cb_kernel_create(). 1499 */ 1500 if (patched_cb) 1501 job->patched_cb = job->user_cb; 1502 else 1503 job->patched_cb = NULL; 1504 1505 job->job_cb_size = job->user_cb_size; 1506 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1507 1508 /* increment refcount as for external queues we get completion */ 1509 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1510 cs_get(cs); 1511 1512 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1513 1514 list_add_tail(&job->cs_node, &cs->job_list); 1515 1516 hl_debugfs_add_job(hdev, job); 1517 1518 return 0; 1519 } 1520 1521 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1522 struct hl_ctx *ctx, struct hl_cs *cs, 1523 u32 wait_queue_id, u32 collective_engine_id, 1524 u32 encaps_signal_offset) 1525 { 1526 struct gaudi_device *gaudi = hdev->asic_specific; 1527 struct hw_queue_properties *hw_queue_prop; 1528 u32 queue_id, collective_queue, num_jobs; 1529 u32 stream, nic_queue, nic_idx = 0; 1530 bool skip; 1531 int i, rc = 0; 1532 1533 /* Verify wait queue id is configured as master */ 1534 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1535 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1536 dev_err(hdev->dev, 1537 "Queue %d is not configured as collective master\n", 1538 wait_queue_id); 1539 return -EINVAL; 1540 } 1541 1542 /* Verify engine id is supported */ 1543 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1544 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1545 dev_err(hdev->dev, 1546 "Collective wait does not support engine %u\n", 1547 collective_engine_id); 1548 return -EINVAL; 1549 } 1550 1551 stream = wait_queue_id % 4; 1552 1553 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1554 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1555 else 1556 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1557 1558 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1559 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1560 1561 /* First job goes to the collective master queue, it will wait for 1562 * the collective slave queues to finish execution. 1563 * The synchronization is done using two monitors: 1564 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1565 * reduction engine (DMA5/TPC7). 1566 * 1567 * Rest of the jobs goes to the collective slave queues which will 1568 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1569 */ 1570 for (i = 0 ; i < num_jobs ; i++) { 1571 if (i == 0) { 1572 queue_id = wait_queue_id; 1573 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1574 HL_COLLECTIVE_MASTER, queue_id, 1575 wait_queue_id, encaps_signal_offset); 1576 } else { 1577 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1578 if (gaudi->hw_cap_initialized & 1579 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1580 skip = false; 1581 else 1582 skip = true; 1583 1584 queue_id = nic_queue; 1585 nic_queue += 4; 1586 nic_idx++; 1587 1588 if (skip) 1589 continue; 1590 } else { 1591 queue_id = collective_queue; 1592 } 1593 1594 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1595 HL_COLLECTIVE_SLAVE, queue_id, 1596 wait_queue_id, encaps_signal_offset); 1597 } 1598 1599 if (rc) 1600 return rc; 1601 } 1602 1603 return rc; 1604 } 1605 1606 static int gaudi_late_init(struct hl_device *hdev) 1607 { 1608 struct gaudi_device *gaudi = hdev->asic_specific; 1609 int rc; 1610 1611 rc = gaudi->cpucp_info_get(hdev); 1612 if (rc) { 1613 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1614 return rc; 1615 } 1616 1617 if ((hdev->card_type == cpucp_card_type_pci) && 1618 (hdev->nic_ports_mask & 0x3)) { 1619 dev_info(hdev->dev, 1620 "PCI card detected, only 8 ports are enabled\n"); 1621 hdev->nic_ports_mask &= ~0x3; 1622 1623 /* Stop and disable unused NIC QMANs */ 1624 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1627 1628 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1629 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1630 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1631 1632 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1633 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1634 1635 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1636 } 1637 1638 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1639 if (rc) { 1640 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1641 return rc; 1642 } 1643 1644 /* Scrub both SRAM and DRAM */ 1645 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1646 if (rc) 1647 goto disable_pci_access; 1648 1649 rc = gaudi_fetch_psoc_frequency(hdev); 1650 if (rc) { 1651 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1652 goto disable_pci_access; 1653 } 1654 1655 rc = gaudi_mmu_clear_pgt_range(hdev); 1656 if (rc) { 1657 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1658 goto disable_pci_access; 1659 } 1660 1661 rc = gaudi_init_tpc_mem(hdev); 1662 if (rc) { 1663 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1664 goto disable_pci_access; 1665 } 1666 1667 rc = gaudi_collective_init(hdev); 1668 if (rc) { 1669 dev_err(hdev->dev, "Failed to init collective\n"); 1670 goto disable_pci_access; 1671 } 1672 1673 /* We only support a single ASID for the user, so for the sake of optimization, just 1674 * initialize the ASID one time during device initialization with the fixed value of 1 1675 */ 1676 gaudi_mmu_prepare(hdev, 1); 1677 1678 hl_fw_set_pll_profile(hdev); 1679 1680 return 0; 1681 1682 disable_pci_access: 1683 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1684 1685 return rc; 1686 } 1687 1688 static void gaudi_late_fini(struct hl_device *hdev) 1689 { 1690 hl_hwmon_release_resources(hdev); 1691 } 1692 1693 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1694 { 1695 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1696 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1697 int i, j, rc = 0; 1698 1699 /* 1700 * The device CPU works with 40-bits addresses, while bit 39 must be set 1701 * to '1' when accessing the host. 1702 * Bits 49:39 of the full host address are saved for a later 1703 * configuration of the HW to perform extension to 50 bits. 1704 * Because there is a single HW register that holds the extension bits, 1705 * these bits must be identical in all allocated range. 1706 */ 1707 1708 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1709 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1710 &dma_addr_arr[i], 1711 GFP_KERNEL | __GFP_ZERO); 1712 if (!virt_addr_arr[i]) { 1713 rc = -ENOMEM; 1714 goto free_dma_mem_arr; 1715 } 1716 1717 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1718 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1719 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1720 break; 1721 } 1722 1723 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1724 dev_err(hdev->dev, 1725 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1726 rc = -EFAULT; 1727 goto free_dma_mem_arr; 1728 } 1729 1730 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1731 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1732 hdev->cpu_pci_msb_addr = 1733 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1734 1735 if (!hdev->asic_prop.fw_security_enabled) 1736 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1737 1738 free_dma_mem_arr: 1739 for (j = 0 ; j < i ; j++) 1740 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1741 dma_addr_arr[j]); 1742 1743 return rc; 1744 } 1745 1746 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1747 { 1748 struct gaudi_device *gaudi = hdev->asic_specific; 1749 struct gaudi_internal_qman_info *q; 1750 u32 i; 1751 1752 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1753 q = &gaudi->internal_qmans[i]; 1754 if (!q->pq_kernel_addr) 1755 continue; 1756 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1757 } 1758 } 1759 1760 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1761 { 1762 struct gaudi_device *gaudi = hdev->asic_specific; 1763 struct gaudi_internal_qman_info *q; 1764 int rc, i; 1765 1766 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1767 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1768 continue; 1769 1770 q = &gaudi->internal_qmans[i]; 1771 1772 switch (i) { 1773 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1774 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1775 break; 1776 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1777 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1778 break; 1779 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1780 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1781 break; 1782 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1783 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1784 break; 1785 default: 1786 dev_err(hdev->dev, "Bad internal queue index %d", i); 1787 rc = -EINVAL; 1788 goto free_internal_qmans_pq_mem; 1789 } 1790 1791 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1792 GFP_KERNEL | __GFP_ZERO); 1793 if (!q->pq_kernel_addr) { 1794 rc = -ENOMEM; 1795 goto free_internal_qmans_pq_mem; 1796 } 1797 } 1798 1799 return 0; 1800 1801 free_internal_qmans_pq_mem: 1802 gaudi_free_internal_qmans_pq_mem(hdev); 1803 return rc; 1804 } 1805 1806 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1807 { 1808 struct asic_fixed_properties *prop = &hdev->asic_prop; 1809 struct pci_mem_region *region; 1810 1811 /* CFG */ 1812 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1813 region->region_base = CFG_BASE; 1814 region->region_size = CFG_SIZE; 1815 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1816 region->bar_size = CFG_BAR_SIZE; 1817 region->bar_id = CFG_BAR_ID; 1818 region->used = 1; 1819 1820 /* SRAM */ 1821 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1822 region->region_base = SRAM_BASE_ADDR; 1823 region->region_size = SRAM_SIZE; 1824 region->offset_in_bar = 0; 1825 region->bar_size = SRAM_BAR_SIZE; 1826 region->bar_id = SRAM_BAR_ID; 1827 region->used = 1; 1828 1829 /* DRAM */ 1830 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1831 region->region_base = DRAM_PHYS_BASE; 1832 region->region_size = hdev->asic_prop.dram_size; 1833 region->offset_in_bar = 0; 1834 region->bar_size = prop->dram_pci_bar_size; 1835 region->bar_id = HBM_BAR_ID; 1836 region->used = 1; 1837 1838 /* SP SRAM */ 1839 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1840 region->region_base = PSOC_SCRATCHPAD_ADDR; 1841 region->region_size = PSOC_SCRATCHPAD_SIZE; 1842 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1843 region->bar_size = CFG_BAR_SIZE; 1844 region->bar_id = CFG_BAR_ID; 1845 region->used = 1; 1846 } 1847 1848 static int gaudi_sw_init(struct hl_device *hdev) 1849 { 1850 struct gaudi_device *gaudi; 1851 u32 i, event_id = 0; 1852 int rc; 1853 1854 /* Allocate device structure */ 1855 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1856 if (!gaudi) 1857 return -ENOMEM; 1858 1859 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1860 if (gaudi_irq_map_table[i].valid) { 1861 if (event_id == GAUDI_EVENT_SIZE) { 1862 dev_err(hdev->dev, 1863 "Event array exceeds the limit of %u events\n", 1864 GAUDI_EVENT_SIZE); 1865 rc = -EINVAL; 1866 goto free_gaudi_device; 1867 } 1868 1869 gaudi->events[event_id++] = 1870 gaudi_irq_map_table[i].fc_id; 1871 } 1872 } 1873 1874 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1875 1876 hdev->asic_specific = gaudi; 1877 1878 /* Create DMA pool for small allocations */ 1879 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1880 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1881 if (!hdev->dma_pool) { 1882 dev_err(hdev->dev, "failed to create DMA pool\n"); 1883 rc = -ENOMEM; 1884 goto free_gaudi_device; 1885 } 1886 1887 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1888 if (rc) 1889 goto free_dma_pool; 1890 1891 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1892 if (!hdev->cpu_accessible_dma_pool) { 1893 dev_err(hdev->dev, 1894 "Failed to create CPU accessible DMA pool\n"); 1895 rc = -ENOMEM; 1896 goto free_cpu_dma_mem; 1897 } 1898 1899 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1900 (uintptr_t) hdev->cpu_accessible_dma_mem, 1901 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1902 if (rc) { 1903 dev_err(hdev->dev, 1904 "Failed to add memory to CPU accessible DMA pool\n"); 1905 rc = -EFAULT; 1906 goto free_cpu_accessible_dma_pool; 1907 } 1908 1909 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1910 if (rc) 1911 goto free_cpu_accessible_dma_pool; 1912 1913 spin_lock_init(&gaudi->hw_queues_lock); 1914 1915 hdev->supports_sync_stream = true; 1916 hdev->supports_coresight = true; 1917 hdev->supports_staged_submission = true; 1918 hdev->supports_wait_for_multi_cs = true; 1919 1920 hdev->asic_funcs->set_pci_memory_regions(hdev); 1921 hdev->stream_master_qid_arr = 1922 hdev->asic_funcs->get_stream_master_qid_arr(); 1923 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1924 1925 return 0; 1926 1927 free_cpu_accessible_dma_pool: 1928 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1929 free_cpu_dma_mem: 1930 if (!hdev->asic_prop.fw_security_enabled) 1931 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1932 hdev->cpu_pci_msb_addr); 1933 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1934 hdev->cpu_accessible_dma_address); 1935 free_dma_pool: 1936 dma_pool_destroy(hdev->dma_pool); 1937 free_gaudi_device: 1938 kfree(gaudi); 1939 return rc; 1940 } 1941 1942 static int gaudi_sw_fini(struct hl_device *hdev) 1943 { 1944 struct gaudi_device *gaudi = hdev->asic_specific; 1945 1946 gaudi_free_internal_qmans_pq_mem(hdev); 1947 1948 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1949 1950 if (!hdev->asic_prop.fw_security_enabled) 1951 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1952 hdev->cpu_pci_msb_addr); 1953 1954 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1955 hdev->cpu_accessible_dma_address); 1956 1957 dma_pool_destroy(hdev->dma_pool); 1958 1959 kfree(gaudi); 1960 1961 return 0; 1962 } 1963 1964 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1965 { 1966 struct hl_device *hdev = arg; 1967 int i; 1968 1969 if (hdev->disabled) 1970 return IRQ_HANDLED; 1971 1972 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1973 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1974 1975 hl_irq_handler_eq(irq, &hdev->event_queue); 1976 1977 return IRQ_HANDLED; 1978 } 1979 1980 /* 1981 * For backward compatibility, new MSI interrupts should be set after the 1982 * existing CPU and NIC interrupts. 1983 */ 1984 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1985 bool cpu_eq) 1986 { 1987 int msi_vec; 1988 1989 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1990 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1991 GAUDI_EVENT_QUEUE_MSI_IDX); 1992 1993 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1994 (nr + NIC_NUMBER_OF_ENGINES + 1); 1995 1996 return pci_irq_vector(hdev->pdev, msi_vec); 1997 } 1998 1999 static int gaudi_enable_msi_single(struct hl_device *hdev) 2000 { 2001 int rc, irq; 2002 2003 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2004 2005 irq = gaudi_pci_irq_vector(hdev, 0, false); 2006 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2007 "gaudi single msi", hdev); 2008 if (rc) 2009 dev_err(hdev->dev, 2010 "Failed to request single MSI IRQ\n"); 2011 2012 return rc; 2013 } 2014 2015 static int gaudi_enable_msi(struct hl_device *hdev) 2016 { 2017 struct gaudi_device *gaudi = hdev->asic_specific; 2018 int rc; 2019 2020 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2021 return 0; 2022 2023 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2024 if (rc < 0) { 2025 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2026 return rc; 2027 } 2028 2029 rc = gaudi_enable_msi_single(hdev); 2030 if (rc) 2031 goto free_pci_irq_vectors; 2032 2033 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2034 2035 return 0; 2036 2037 free_pci_irq_vectors: 2038 pci_free_irq_vectors(hdev->pdev); 2039 return rc; 2040 } 2041 2042 static void gaudi_sync_irqs(struct hl_device *hdev) 2043 { 2044 struct gaudi_device *gaudi = hdev->asic_specific; 2045 2046 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2047 return; 2048 2049 /* Wait for all pending IRQs to be finished */ 2050 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2051 } 2052 2053 static void gaudi_disable_msi(struct hl_device *hdev) 2054 { 2055 struct gaudi_device *gaudi = hdev->asic_specific; 2056 2057 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2058 return; 2059 2060 gaudi_sync_irqs(hdev); 2061 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2062 pci_free_irq_vectors(hdev->pdev); 2063 2064 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2065 } 2066 2067 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2068 { 2069 struct gaudi_device *gaudi = hdev->asic_specific; 2070 2071 if (hdev->asic_prop.fw_security_enabled) 2072 return; 2073 2074 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2075 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2076 return; 2077 2078 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2079 return; 2080 2081 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2082 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2083 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2084 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2085 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2086 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2087 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2088 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2089 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2091 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2093 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2095 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2097 2098 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2099 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2100 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2101 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2102 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2103 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2104 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2105 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2106 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2108 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2110 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2112 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2114 2115 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2116 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2117 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2118 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2119 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2120 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2121 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2122 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2123 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2125 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2127 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2129 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2131 2132 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2133 } 2134 2135 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2136 { 2137 struct gaudi_device *gaudi = hdev->asic_specific; 2138 2139 if (hdev->asic_prop.fw_security_enabled) 2140 return; 2141 2142 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2143 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2144 return; 2145 2146 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2147 return; 2148 2149 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2150 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2151 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2152 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2153 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2154 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2155 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2157 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2159 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2161 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2163 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2165 2166 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2167 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2168 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2169 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2170 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2171 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2172 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2173 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2174 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2176 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2178 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2180 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2182 2183 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2184 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2185 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2186 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2187 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2188 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2189 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2190 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2191 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2193 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2195 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2197 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2199 2200 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2201 } 2202 2203 static void gaudi_init_e2e(struct hl_device *hdev) 2204 { 2205 if (hdev->asic_prop.fw_security_enabled) 2206 return; 2207 2208 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2209 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2210 return; 2211 2212 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2214 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2216 2217 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2219 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2221 2222 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2224 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2226 2227 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2229 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2231 2232 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2234 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2236 2237 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2239 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2241 2242 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2244 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2246 2247 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2249 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2251 2252 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2254 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2256 2257 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2259 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2261 2262 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2264 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2266 2267 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2269 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2271 2272 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2274 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2276 2277 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2279 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2281 2282 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2284 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2286 2287 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2289 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2291 2292 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2296 2297 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2301 2302 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2306 2307 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2311 2312 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2316 2317 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2321 2322 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2326 2327 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2331 2332 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2333 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2334 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2335 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2336 2337 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2338 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2339 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2340 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2341 2342 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2343 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2344 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2345 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2346 2347 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2348 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2349 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2350 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2351 2352 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2353 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2354 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2355 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2356 2357 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2358 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2359 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2360 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2361 2362 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2363 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2364 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2365 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2366 2367 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2368 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2369 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2370 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2371 2372 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2373 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2374 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2375 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2376 2377 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2378 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2379 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2380 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2381 2382 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2383 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2384 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2385 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2386 2387 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2388 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2389 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2390 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2391 2392 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2393 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2394 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2395 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2396 2397 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2398 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2399 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2400 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2401 2402 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2403 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2404 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2405 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2406 2407 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2408 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2409 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2410 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2411 2412 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2413 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2414 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2415 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2416 2417 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2418 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2419 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2420 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2421 2422 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2423 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2424 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2425 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2426 2427 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2428 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2429 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2430 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2431 2432 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2433 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2434 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2435 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2436 2437 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2438 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2439 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2440 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2441 2442 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2443 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2444 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2445 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2446 2447 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2448 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2449 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2450 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2451 } 2452 2453 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2454 { 2455 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2456 2457 if (hdev->asic_prop.fw_security_enabled) 2458 return; 2459 2460 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2461 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2462 return; 2463 2464 hbm0_wr = 0x33333333; 2465 hbm0_rd = 0x77777777; 2466 hbm1_wr = 0x55555555; 2467 hbm1_rd = 0xDDDDDDDD; 2468 2469 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2470 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2471 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2472 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2473 2474 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2475 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2476 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2477 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2478 2479 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2480 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2481 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2482 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2483 2484 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2485 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2486 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2487 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2488 2489 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2490 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2491 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2492 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2493 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2494 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2495 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2496 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2497 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2498 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2499 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2500 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2501 2502 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2503 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2504 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2505 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2506 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2507 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2508 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2509 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2510 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2511 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2512 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2513 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2514 } 2515 2516 static void gaudi_init_golden_registers(struct hl_device *hdev) 2517 { 2518 u32 tpc_offset; 2519 int tpc_id, i; 2520 2521 gaudi_init_e2e(hdev); 2522 gaudi_init_hbm_cred(hdev); 2523 2524 for (tpc_id = 0, tpc_offset = 0; 2525 tpc_id < TPC_NUMBER_OF_ENGINES; 2526 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2527 /* Mask all arithmetic interrupts from TPC */ 2528 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2529 /* Set 16 cache lines */ 2530 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2531 ICACHE_FETCH_LINE_NUM, 2); 2532 } 2533 2534 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2535 for (i = 0 ; i < 128 ; i += 8) 2536 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2537 2538 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2539 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2540 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2541 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2542 } 2543 2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2545 int qman_id, dma_addr_t qman_pq_addr) 2546 { 2547 struct cpu_dyn_regs *dyn_regs = 2548 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2549 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2550 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2551 u32 q_off, dma_qm_offset; 2552 u32 dma_qm_err_cfg, irq_handler_offset; 2553 2554 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2555 2556 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2557 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2558 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2559 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2560 so_base_en_lo = lower_32_bits(CFG_BASE + 2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2562 so_base_en_hi = upper_32_bits(CFG_BASE + 2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2564 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2565 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2566 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2567 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2568 so_base_ws_lo = lower_32_bits(CFG_BASE + 2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2570 so_base_ws_hi = upper_32_bits(CFG_BASE + 2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2572 2573 q_off = dma_qm_offset + qman_id * 4; 2574 2575 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2576 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2577 2578 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2579 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2580 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2581 2582 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2583 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2584 QMAN_LDMA_SRC_OFFSET); 2585 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2586 QMAN_LDMA_DST_OFFSET); 2587 2588 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2590 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2592 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2594 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2596 2597 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2598 2599 /* The following configuration is needed only once per QMAN */ 2600 if (qman_id == 0) { 2601 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2602 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2603 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2604 2605 /* Configure RAZWI IRQ */ 2606 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2607 if (hdev->stop_on_err) 2608 dma_qm_err_cfg |= 2609 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2610 2611 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2612 2613 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2614 lower_32_bits(CFG_BASE + irq_handler_offset)); 2615 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2616 upper_32_bits(CFG_BASE + irq_handler_offset)); 2617 2618 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2619 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2620 dma_id); 2621 2622 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2623 QM_ARB_ERR_MSG_EN_MASK); 2624 2625 /* Set timeout to maximum */ 2626 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2627 2628 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2629 QMAN_EXTERNAL_MAKE_TRUSTED); 2630 2631 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2632 } 2633 } 2634 2635 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2636 { 2637 struct cpu_dyn_regs *dyn_regs = 2638 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2639 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2640 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2641 u32 irq_handler_offset; 2642 2643 /* Set to maximum possible according to physical size */ 2644 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2645 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2646 2647 /* WA for H/W bug H3-2116 */ 2648 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2649 2650 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2651 if (hdev->stop_on_err) 2652 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2653 2654 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2655 2656 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2657 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2658 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2659 2660 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2661 lower_32_bits(CFG_BASE + irq_handler_offset)); 2662 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2663 upper_32_bits(CFG_BASE + irq_handler_offset)); 2664 2665 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2666 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2667 WREG32(mmDMA0_CORE_PROT + dma_offset, 2668 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2669 /* If the channel is secured, it should be in MMU bypass mode */ 2670 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2671 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2672 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2673 } 2674 2675 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2676 u32 enable_mask) 2677 { 2678 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2679 2680 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2681 } 2682 2683 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2684 { 2685 struct gaudi_device *gaudi = hdev->asic_specific; 2686 struct hl_hw_queue *q; 2687 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2688 2689 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2690 return; 2691 2692 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2693 dma_id = gaudi_dma_assignment[i]; 2694 /* 2695 * For queues after the CPU Q need to add 1 to get the correct 2696 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2697 * order to get the correct MSI register. 2698 */ 2699 if (dma_id > 1) { 2700 cpu_skip = 1; 2701 nic_skip = NIC_NUMBER_OF_ENGINES; 2702 } else { 2703 cpu_skip = 0; 2704 nic_skip = 0; 2705 } 2706 2707 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2708 q_idx = 4 * dma_id + j + cpu_skip; 2709 q = &hdev->kernel_queues[q_idx]; 2710 q->cq_id = cq_id++; 2711 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2712 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2713 q->bus_address); 2714 } 2715 2716 gaudi_init_dma_core(hdev, dma_id); 2717 2718 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2719 } 2720 2721 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2722 } 2723 2724 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2725 int qman_id, u64 qman_base_addr) 2726 { 2727 struct cpu_dyn_regs *dyn_regs = 2728 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2729 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2730 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2731 u32 dma_qm_err_cfg, irq_handler_offset; 2732 u32 q_off, dma_qm_offset; 2733 2734 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2735 2736 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2737 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2738 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2739 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2740 so_base_en_lo = lower_32_bits(CFG_BASE + 2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2742 so_base_en_hi = upper_32_bits(CFG_BASE + 2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2744 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2745 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2746 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2747 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2748 so_base_ws_lo = lower_32_bits(CFG_BASE + 2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2750 so_base_ws_hi = upper_32_bits(CFG_BASE + 2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2752 2753 q_off = dma_qm_offset + qman_id * 4; 2754 2755 if (qman_id < 4) { 2756 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2757 lower_32_bits(qman_base_addr)); 2758 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2759 upper_32_bits(qman_base_addr)); 2760 2761 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2762 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2763 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2764 2765 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2766 QMAN_CPDMA_SIZE_OFFSET); 2767 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2768 QMAN_CPDMA_SRC_OFFSET); 2769 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2770 QMAN_CPDMA_DST_OFFSET); 2771 } else { 2772 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2773 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2774 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2775 2776 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2777 QMAN_LDMA_SIZE_OFFSET); 2778 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2779 QMAN_LDMA_SRC_OFFSET); 2780 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2781 QMAN_LDMA_DST_OFFSET); 2782 2783 /* Configure RAZWI IRQ */ 2784 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2785 if (hdev->stop_on_err) 2786 dma_qm_err_cfg |= 2787 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2788 2789 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2790 2791 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2792 lower_32_bits(CFG_BASE + irq_handler_offset)); 2793 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2794 upper_32_bits(CFG_BASE + irq_handler_offset)); 2795 2796 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2797 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2798 dma_id); 2799 2800 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2801 QM_ARB_ERR_MSG_EN_MASK); 2802 2803 /* Set timeout to maximum */ 2804 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2805 2806 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2807 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2808 QMAN_INTERNAL_MAKE_TRUSTED); 2809 } 2810 2811 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2813 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2815 2816 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2817 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2818 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2819 mtr_base_ws_lo); 2820 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2821 mtr_base_ws_hi); 2822 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2823 so_base_ws_lo); 2824 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2825 so_base_ws_hi); 2826 } 2827 } 2828 2829 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2830 { 2831 struct gaudi_device *gaudi = hdev->asic_specific; 2832 struct gaudi_internal_qman_info *q; 2833 u64 qman_base_addr; 2834 int i, j, dma_id, internal_q_index; 2835 2836 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2837 return; 2838 2839 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2840 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2841 2842 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2843 /* 2844 * Add the CPU queue in order to get the correct queue 2845 * number as all internal queue are placed after it 2846 */ 2847 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2848 2849 q = &gaudi->internal_qmans[internal_q_index]; 2850 qman_base_addr = (u64) q->pq_dma_addr; 2851 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2852 qman_base_addr); 2853 } 2854 2855 /* Initializing lower CP for HBM DMA QMAN */ 2856 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2857 2858 gaudi_init_dma_core(hdev, dma_id); 2859 2860 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2861 } 2862 2863 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2864 } 2865 2866 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2867 int qman_id, u64 qman_base_addr) 2868 { 2869 struct cpu_dyn_regs *dyn_regs = 2870 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2871 u32 mtr_base_lo, mtr_base_hi; 2872 u32 so_base_lo, so_base_hi; 2873 u32 irq_handler_offset; 2874 u32 q_off, mme_id; 2875 u32 mme_qm_err_cfg; 2876 2877 mtr_base_lo = lower_32_bits(CFG_BASE + 2878 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2879 mtr_base_hi = upper_32_bits(CFG_BASE + 2880 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2881 so_base_lo = lower_32_bits(CFG_BASE + 2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2883 so_base_hi = upper_32_bits(CFG_BASE + 2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2885 2886 q_off = mme_offset + qman_id * 4; 2887 2888 if (qman_id < 4) { 2889 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2890 lower_32_bits(qman_base_addr)); 2891 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2892 upper_32_bits(qman_base_addr)); 2893 2894 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2895 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2896 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2897 2898 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2899 QMAN_CPDMA_SIZE_OFFSET); 2900 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2901 QMAN_CPDMA_SRC_OFFSET); 2902 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2903 QMAN_CPDMA_DST_OFFSET); 2904 } else { 2905 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2906 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2907 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2908 2909 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2910 QMAN_LDMA_SIZE_OFFSET); 2911 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2912 QMAN_LDMA_SRC_OFFSET); 2913 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2914 QMAN_LDMA_DST_OFFSET); 2915 2916 /* Configure RAZWI IRQ */ 2917 mme_id = mme_offset / 2918 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2919 2920 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2921 if (hdev->stop_on_err) 2922 mme_qm_err_cfg |= 2923 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2924 2925 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2926 2927 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2928 lower_32_bits(CFG_BASE + irq_handler_offset)); 2929 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2930 upper_32_bits(CFG_BASE + irq_handler_offset)); 2931 2932 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2933 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2934 mme_id); 2935 2936 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2937 QM_ARB_ERR_MSG_EN_MASK); 2938 2939 /* Set timeout to maximum */ 2940 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 2941 2942 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2943 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2944 QMAN_INTERNAL_MAKE_TRUSTED); 2945 } 2946 2947 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2949 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2951 } 2952 2953 static void gaudi_init_mme_qmans(struct hl_device *hdev) 2954 { 2955 struct gaudi_device *gaudi = hdev->asic_specific; 2956 struct gaudi_internal_qman_info *q; 2957 u64 qman_base_addr; 2958 u32 mme_offset; 2959 int i, internal_q_index; 2960 2961 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2962 return; 2963 2964 /* 2965 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2966 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2967 */ 2968 2969 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2970 2971 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2972 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2973 q = &gaudi->internal_qmans[internal_q_index]; 2974 qman_base_addr = (u64) q->pq_dma_addr; 2975 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2976 qman_base_addr); 2977 if (i == 3) 2978 mme_offset = 0; 2979 } 2980 2981 /* Initializing lower CP for MME QMANs */ 2982 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2983 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2984 gaudi_init_mme_qman(hdev, 0, 4, 0); 2985 2986 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2987 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2988 2989 gaudi->hw_cap_initialized |= HW_CAP_MME; 2990 } 2991 2992 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 2993 int qman_id, u64 qman_base_addr) 2994 { 2995 struct cpu_dyn_regs *dyn_regs = 2996 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2997 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2998 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2999 u32 tpc_qm_err_cfg, irq_handler_offset; 3000 u32 q_off, tpc_id; 3001 3002 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3003 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3004 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3005 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3006 so_base_en_lo = lower_32_bits(CFG_BASE + 3007 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3008 so_base_en_hi = upper_32_bits(CFG_BASE + 3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3010 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3011 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3012 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3013 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3014 so_base_ws_lo = lower_32_bits(CFG_BASE + 3015 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3016 so_base_ws_hi = upper_32_bits(CFG_BASE + 3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3018 3019 q_off = tpc_offset + qman_id * 4; 3020 3021 tpc_id = tpc_offset / 3022 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3023 3024 if (qman_id < 4) { 3025 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3026 lower_32_bits(qman_base_addr)); 3027 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3028 upper_32_bits(qman_base_addr)); 3029 3030 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3031 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3032 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3033 3034 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3035 QMAN_CPDMA_SIZE_OFFSET); 3036 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3037 QMAN_CPDMA_SRC_OFFSET); 3038 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3039 QMAN_CPDMA_DST_OFFSET); 3040 } else { 3041 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3042 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3043 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3044 3045 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3046 QMAN_LDMA_SIZE_OFFSET); 3047 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3048 QMAN_LDMA_SRC_OFFSET); 3049 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3050 QMAN_LDMA_DST_OFFSET); 3051 3052 /* Configure RAZWI IRQ */ 3053 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3054 if (hdev->stop_on_err) 3055 tpc_qm_err_cfg |= 3056 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3057 3058 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3059 3060 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3061 lower_32_bits(CFG_BASE + irq_handler_offset)); 3062 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3063 upper_32_bits(CFG_BASE + irq_handler_offset)); 3064 3065 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3066 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3067 tpc_id); 3068 3069 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3070 QM_ARB_ERR_MSG_EN_MASK); 3071 3072 /* Set timeout to maximum */ 3073 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3074 3075 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3076 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3077 QMAN_INTERNAL_MAKE_TRUSTED); 3078 } 3079 3080 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3082 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3084 3085 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3086 if (tpc_id == 6) { 3087 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3088 mtr_base_ws_lo); 3089 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3090 mtr_base_ws_hi); 3091 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3092 so_base_ws_lo); 3093 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3094 so_base_ws_hi); 3095 } 3096 } 3097 3098 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3099 { 3100 struct gaudi_device *gaudi = hdev->asic_specific; 3101 struct gaudi_internal_qman_info *q; 3102 u64 qman_base_addr; 3103 u32 so_base_hi, tpc_offset = 0; 3104 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3105 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3106 int i, tpc_id, internal_q_index; 3107 3108 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3109 return; 3110 3111 so_base_hi = upper_32_bits(CFG_BASE + 3112 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3113 3114 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3115 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3116 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3117 tpc_id * QMAN_STREAMS + i; 3118 q = &gaudi->internal_qmans[internal_q_index]; 3119 qman_base_addr = (u64) q->pq_dma_addr; 3120 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3121 qman_base_addr); 3122 3123 if (i == 3) { 3124 /* Initializing lower CP for TPC QMAN */ 3125 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3126 3127 /* Enable the QMAN and TPC channel */ 3128 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3129 QMAN_TPC_ENABLE); 3130 } 3131 } 3132 3133 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3134 so_base_hi); 3135 3136 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3137 3138 gaudi->hw_cap_initialized |= 3139 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3140 } 3141 } 3142 3143 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3144 int qman_id, u64 qman_base_addr, int nic_id) 3145 { 3146 struct cpu_dyn_regs *dyn_regs = 3147 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3148 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3149 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3150 u32 nic_qm_err_cfg, irq_handler_offset; 3151 u32 q_off; 3152 3153 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3154 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3155 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3156 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3157 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3159 so_base_en_hi = upper_32_bits(CFG_BASE + 3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3161 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3162 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3163 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3164 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3165 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3167 so_base_ws_hi = upper_32_bits(CFG_BASE + 3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3169 3170 q_off = nic_offset + qman_id * 4; 3171 3172 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3173 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3174 3175 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3176 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3177 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3178 3179 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3180 QMAN_LDMA_SIZE_OFFSET); 3181 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3182 QMAN_LDMA_SRC_OFFSET); 3183 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3184 QMAN_LDMA_DST_OFFSET); 3185 3186 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3188 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3190 3191 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3192 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3194 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3196 3197 if (qman_id == 0) { 3198 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3199 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3200 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3201 3202 /* Configure RAZWI IRQ */ 3203 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3204 if (hdev->stop_on_err) 3205 nic_qm_err_cfg |= 3206 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3207 3208 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3209 3210 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3211 lower_32_bits(CFG_BASE + irq_handler_offset)); 3212 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3213 upper_32_bits(CFG_BASE + irq_handler_offset)); 3214 3215 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3216 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3217 nic_id); 3218 3219 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3220 QM_ARB_ERR_MSG_EN_MASK); 3221 3222 /* Set timeout to maximum */ 3223 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3224 3225 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3226 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3227 QMAN_INTERNAL_MAKE_TRUSTED); 3228 } 3229 } 3230 3231 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3232 { 3233 struct gaudi_device *gaudi = hdev->asic_specific; 3234 struct gaudi_internal_qman_info *q; 3235 u64 qman_base_addr; 3236 u32 nic_offset = 0; 3237 u32 nic_delta_between_qmans = 3238 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3239 u32 nic_delta_between_nics = 3240 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3241 int i, nic_id, internal_q_index; 3242 3243 if (!hdev->nic_ports_mask) 3244 return; 3245 3246 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3247 return; 3248 3249 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3250 3251 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3252 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3253 nic_offset += nic_delta_between_qmans; 3254 if (nic_id & 1) { 3255 nic_offset -= (nic_delta_between_qmans * 2); 3256 nic_offset += nic_delta_between_nics; 3257 } 3258 continue; 3259 } 3260 3261 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3262 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3263 nic_id * QMAN_STREAMS + i; 3264 q = &gaudi->internal_qmans[internal_q_index]; 3265 qman_base_addr = (u64) q->pq_dma_addr; 3266 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3267 qman_base_addr, nic_id); 3268 } 3269 3270 /* Enable the QMAN */ 3271 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3272 3273 nic_offset += nic_delta_between_qmans; 3274 if (nic_id & 1) { 3275 nic_offset -= (nic_delta_between_qmans * 2); 3276 nic_offset += nic_delta_between_nics; 3277 } 3278 3279 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3280 } 3281 } 3282 3283 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3284 { 3285 struct gaudi_device *gaudi = hdev->asic_specific; 3286 3287 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3288 return; 3289 3290 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3291 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3292 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3293 } 3294 3295 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3296 { 3297 struct gaudi_device *gaudi = hdev->asic_specific; 3298 3299 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3300 return; 3301 3302 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3303 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3304 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3305 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3306 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3307 } 3308 3309 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3310 { 3311 struct gaudi_device *gaudi = hdev->asic_specific; 3312 3313 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3314 return; 3315 3316 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3317 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3318 } 3319 3320 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3321 { 3322 struct gaudi_device *gaudi = hdev->asic_specific; 3323 u32 tpc_offset = 0; 3324 int tpc_id; 3325 3326 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3327 return; 3328 3329 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3330 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3331 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3332 } 3333 } 3334 3335 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3336 { 3337 struct gaudi_device *gaudi = hdev->asic_specific; 3338 u32 nic_mask, nic_offset = 0; 3339 u32 nic_delta_between_qmans = 3340 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3341 u32 nic_delta_between_nics = 3342 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3343 int nic_id; 3344 3345 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3346 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3347 3348 if (gaudi->hw_cap_initialized & nic_mask) 3349 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3350 3351 nic_offset += nic_delta_between_qmans; 3352 if (nic_id & 1) { 3353 nic_offset -= (nic_delta_between_qmans * 2); 3354 nic_offset += nic_delta_between_nics; 3355 } 3356 } 3357 } 3358 3359 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3360 { 3361 struct gaudi_device *gaudi = hdev->asic_specific; 3362 3363 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3364 return; 3365 3366 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3367 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3368 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3369 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3370 } 3371 3372 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3373 { 3374 struct gaudi_device *gaudi = hdev->asic_specific; 3375 3376 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3377 return; 3378 3379 /* Stop CPs of HBM DMA QMANs */ 3380 3381 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3382 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3383 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3384 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3385 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3386 } 3387 3388 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3389 { 3390 struct gaudi_device *gaudi = hdev->asic_specific; 3391 3392 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3393 return; 3394 3395 /* Stop CPs of MME QMANs */ 3396 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3397 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3398 } 3399 3400 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3401 { 3402 struct gaudi_device *gaudi = hdev->asic_specific; 3403 3404 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3405 return; 3406 3407 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3408 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3409 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3410 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3411 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3412 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3413 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3414 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3415 } 3416 3417 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3418 { 3419 struct gaudi_device *gaudi = hdev->asic_specific; 3420 3421 /* Stop upper CPs of QMANs */ 3422 3423 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3424 WREG32(mmNIC0_QM0_GLBL_CFG1, 3425 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3426 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3427 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3428 3429 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3430 WREG32(mmNIC0_QM1_GLBL_CFG1, 3431 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3432 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3433 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3434 3435 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3436 WREG32(mmNIC1_QM0_GLBL_CFG1, 3437 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3438 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3439 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3440 3441 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3442 WREG32(mmNIC1_QM1_GLBL_CFG1, 3443 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3444 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3445 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3446 3447 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3448 WREG32(mmNIC2_QM0_GLBL_CFG1, 3449 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3450 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3451 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3452 3453 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3454 WREG32(mmNIC2_QM1_GLBL_CFG1, 3455 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3456 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3457 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3458 3459 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3460 WREG32(mmNIC3_QM0_GLBL_CFG1, 3461 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3462 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3463 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3464 3465 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3466 WREG32(mmNIC3_QM1_GLBL_CFG1, 3467 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3468 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3469 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3470 3471 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3472 WREG32(mmNIC4_QM0_GLBL_CFG1, 3473 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3474 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3475 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3476 3477 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3478 WREG32(mmNIC4_QM1_GLBL_CFG1, 3479 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3480 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3481 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3482 } 3483 3484 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3485 { 3486 struct gaudi_device *gaudi = hdev->asic_specific; 3487 3488 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3489 return; 3490 3491 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3492 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3493 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3494 } 3495 3496 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3497 { 3498 struct gaudi_device *gaudi = hdev->asic_specific; 3499 3500 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3501 return; 3502 3503 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3504 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3505 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3506 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3507 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3508 } 3509 3510 static void gaudi_mme_stall(struct hl_device *hdev) 3511 { 3512 struct gaudi_device *gaudi = hdev->asic_specific; 3513 3514 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3515 return; 3516 3517 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3518 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3520 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3522 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3524 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3526 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3528 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3530 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3532 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3534 } 3535 3536 static void gaudi_tpc_stall(struct hl_device *hdev) 3537 { 3538 struct gaudi_device *gaudi = hdev->asic_specific; 3539 3540 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3541 return; 3542 3543 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3544 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3545 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3546 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3547 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3548 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3549 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3550 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3551 } 3552 3553 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3554 { 3555 u32 qman_offset; 3556 int i; 3557 3558 if (hdev->asic_prop.fw_security_enabled) 3559 return; 3560 3561 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3562 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3563 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3564 3565 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3566 } 3567 3568 WREG32(mmMME0_QM_CGM_CFG, 0); 3569 WREG32(mmMME0_QM_CGM_CFG1, 0); 3570 WREG32(mmMME2_QM_CGM_CFG, 0); 3571 WREG32(mmMME2_QM_CGM_CFG1, 0); 3572 3573 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3574 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3575 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3576 3577 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3578 } 3579 } 3580 3581 static void gaudi_enable_timestamp(struct hl_device *hdev) 3582 { 3583 /* Disable the timestamp counter */ 3584 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3585 3586 /* Zero the lower/upper parts of the 64-bit counter */ 3587 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3589 3590 /* Enable the counter */ 3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3592 } 3593 3594 static void gaudi_disable_timestamp(struct hl_device *hdev) 3595 { 3596 /* Disable the timestamp counter */ 3597 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3598 } 3599 3600 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3601 { 3602 u32 wait_timeout_ms; 3603 3604 if (hdev->pldm) 3605 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3606 else 3607 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3608 3609 if (fw_reset) 3610 goto skip_engines; 3611 3612 gaudi_stop_nic_qmans(hdev); 3613 gaudi_stop_mme_qmans(hdev); 3614 gaudi_stop_tpc_qmans(hdev); 3615 gaudi_stop_hbm_dma_qmans(hdev); 3616 gaudi_stop_pci_dma_qmans(hdev); 3617 3618 msleep(wait_timeout_ms); 3619 3620 gaudi_pci_dma_stall(hdev); 3621 gaudi_hbm_dma_stall(hdev); 3622 gaudi_tpc_stall(hdev); 3623 gaudi_mme_stall(hdev); 3624 3625 msleep(wait_timeout_ms); 3626 3627 gaudi_disable_nic_qmans(hdev); 3628 gaudi_disable_mme_qmans(hdev); 3629 gaudi_disable_tpc_qmans(hdev); 3630 gaudi_disable_hbm_dma_qmans(hdev); 3631 gaudi_disable_pci_dma_qmans(hdev); 3632 3633 gaudi_disable_timestamp(hdev); 3634 3635 skip_engines: 3636 gaudi_disable_msi(hdev); 3637 } 3638 3639 static int gaudi_mmu_init(struct hl_device *hdev) 3640 { 3641 struct asic_fixed_properties *prop = &hdev->asic_prop; 3642 struct gaudi_device *gaudi = hdev->asic_specific; 3643 u64 hop0_addr; 3644 int rc, i; 3645 3646 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3647 return 0; 3648 3649 for (i = 0 ; i < prop->max_asid ; i++) { 3650 hop0_addr = prop->mmu_pgt_addr + 3651 (i * prop->mmu_hop_table_size); 3652 3653 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3654 if (rc) { 3655 dev_err(hdev->dev, 3656 "failed to set hop0 addr for asid %d\n", i); 3657 return rc; 3658 } 3659 } 3660 3661 /* init MMU cache manage page */ 3662 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3663 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3664 3665 /* mem cache invalidation */ 3666 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3667 3668 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3669 if (rc) 3670 return rc; 3671 3672 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3673 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3674 3675 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3676 3677 /* 3678 * The H/W expects the first PI after init to be 1. After wraparound 3679 * we'll write 0. 3680 */ 3681 gaudi->mmu_cache_inv_pi = 1; 3682 3683 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3684 3685 return 0; 3686 } 3687 3688 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3689 { 3690 void __iomem *dst; 3691 3692 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3693 3694 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3695 } 3696 3697 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3698 { 3699 void __iomem *dst; 3700 3701 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3702 3703 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3704 } 3705 3706 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3707 { 3708 struct dynamic_fw_load_mgr *dynamic_loader; 3709 struct cpu_dyn_regs *dyn_regs; 3710 3711 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3712 3713 /* 3714 * here we update initial values for few specific dynamic regs (as 3715 * before reading the first descriptor from FW those value has to be 3716 * hard-coded) in later stages of the protocol those values will be 3717 * updated automatically by reading the FW descriptor so data there 3718 * will always be up-to-date 3719 */ 3720 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3721 dyn_regs->kmd_msg_to_cpu = 3722 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3723 dyn_regs->cpu_cmd_status_to_host = 3724 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3725 3726 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3727 } 3728 3729 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3730 { 3731 struct static_fw_load_mgr *static_loader; 3732 3733 static_loader = &hdev->fw_loader.static_loader; 3734 3735 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3736 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3737 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3738 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3739 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3740 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3741 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3742 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3743 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3744 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3745 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3746 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3747 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3748 GAUDI_PLDM_RESET_WAIT_MSEC : 3749 GAUDI_CPU_RESET_WAIT_MSEC; 3750 } 3751 3752 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3753 { 3754 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3755 3756 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3757 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3758 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3759 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3760 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3761 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3762 } 3763 3764 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3765 { 3766 struct asic_fixed_properties *prop = &hdev->asic_prop; 3767 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3768 3769 /* fill common fields */ 3770 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3771 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3772 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3773 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3774 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3775 fw_loader->skip_bmc = !hdev->bmc_enable; 3776 fw_loader->sram_bar_id = SRAM_BAR_ID; 3777 fw_loader->dram_bar_id = HBM_BAR_ID; 3778 3779 if (prop->dynamic_fw_load) 3780 gaudi_init_dynamic_firmware_loader(hdev); 3781 else 3782 gaudi_init_static_firmware_loader(hdev); 3783 } 3784 3785 static int gaudi_init_cpu(struct hl_device *hdev) 3786 { 3787 struct gaudi_device *gaudi = hdev->asic_specific; 3788 int rc; 3789 3790 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3791 return 0; 3792 3793 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3794 return 0; 3795 3796 /* 3797 * The device CPU works with 40 bits addresses. 3798 * This register sets the extension to 50 bits. 3799 */ 3800 if (!hdev->asic_prop.fw_security_enabled) 3801 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3802 3803 rc = hl_fw_init_cpu(hdev); 3804 3805 if (rc) 3806 return rc; 3807 3808 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3809 3810 return 0; 3811 } 3812 3813 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3814 { 3815 struct cpu_dyn_regs *dyn_regs = 3816 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3817 struct asic_fixed_properties *prop = &hdev->asic_prop; 3818 struct gaudi_device *gaudi = hdev->asic_specific; 3819 u32 status, irq_handler_offset; 3820 struct hl_eq *eq; 3821 struct hl_hw_queue *cpu_pq = 3822 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3823 int err; 3824 3825 if (!hdev->cpu_queues_enable) 3826 return 0; 3827 3828 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3829 return 0; 3830 3831 eq = &hdev->event_queue; 3832 3833 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3835 3836 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3838 3839 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3840 lower_32_bits(hdev->cpu_accessible_dma_address)); 3841 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3842 upper_32_bits(hdev->cpu_accessible_dma_address)); 3843 3844 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3845 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3846 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3847 3848 /* Used for EQ CI */ 3849 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3850 3851 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3852 3853 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3854 3855 irq_handler_offset = prop->gic_interrupts_enable ? 3856 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3857 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3858 3859 WREG32(irq_handler_offset, 3860 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3861 3862 err = hl_poll_timeout( 3863 hdev, 3864 mmCPU_IF_QUEUE_INIT, 3865 status, 3866 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3867 1000, 3868 cpu_timeout); 3869 3870 if (err) { 3871 dev_err(hdev->dev, 3872 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3873 return -EIO; 3874 } 3875 3876 /* update FW application security bits */ 3877 if (prop->fw_cpu_boot_dev_sts0_valid) 3878 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3879 if (prop->fw_cpu_boot_dev_sts1_valid) 3880 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3881 3882 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3883 return 0; 3884 } 3885 3886 static void gaudi_pre_hw_init(struct hl_device *hdev) 3887 { 3888 /* Perform read from the device to make sure device is up */ 3889 RREG32(mmHW_STATE); 3890 3891 if (!hdev->asic_prop.fw_security_enabled) { 3892 /* Set the access through PCI bars (Linux driver only) as 3893 * secured 3894 */ 3895 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3896 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3897 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3898 3899 /* Perform read to flush the waiting writes to ensure 3900 * configuration was set in the device 3901 */ 3902 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3903 } 3904 3905 /* 3906 * Let's mark in the H/W that we have reached this point. We check 3907 * this value in the reset_before_init function to understand whether 3908 * we need to reset the chip before doing H/W init. This register is 3909 * cleared by the H/W upon H/W reset 3910 */ 3911 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3912 } 3913 3914 static int gaudi_hw_init(struct hl_device *hdev) 3915 { 3916 struct gaudi_device *gaudi = hdev->asic_specific; 3917 int rc; 3918 3919 gaudi_pre_hw_init(hdev); 3920 3921 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3922 * So we set it here and if anyone tries to move it later to 3923 * a different address, there will be an error 3924 */ 3925 if (hdev->asic_prop.iatu_done_by_fw) 3926 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3927 3928 /* 3929 * Before pushing u-boot/linux to device, need to set the hbm bar to 3930 * base address of dram 3931 */ 3932 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 3933 dev_err(hdev->dev, 3934 "failed to map HBM bar to DRAM base address\n"); 3935 return -EIO; 3936 } 3937 3938 rc = gaudi_init_cpu(hdev); 3939 if (rc) { 3940 dev_err(hdev->dev, "failed to initialize CPU\n"); 3941 return rc; 3942 } 3943 3944 /* In case the clock gating was enabled in preboot we need to disable 3945 * it here before touching the MME/TPC registers. 3946 */ 3947 gaudi_disable_clock_gating(hdev); 3948 3949 /* SRAM scrambler must be initialized after CPU is running from HBM */ 3950 gaudi_init_scrambler_sram(hdev); 3951 3952 /* This is here just in case we are working without CPU */ 3953 gaudi_init_scrambler_hbm(hdev); 3954 3955 gaudi_init_golden_registers(hdev); 3956 3957 rc = gaudi_mmu_init(hdev); 3958 if (rc) 3959 return rc; 3960 3961 gaudi_init_security(hdev); 3962 3963 gaudi_init_pci_dma_qmans(hdev); 3964 3965 gaudi_init_hbm_dma_qmans(hdev); 3966 3967 gaudi_init_mme_qmans(hdev); 3968 3969 gaudi_init_tpc_qmans(hdev); 3970 3971 gaudi_init_nic_qmans(hdev); 3972 3973 gaudi_enable_timestamp(hdev); 3974 3975 /* MSI must be enabled before CPU queues and NIC are initialized */ 3976 rc = gaudi_enable_msi(hdev); 3977 if (rc) 3978 goto disable_queues; 3979 3980 /* must be called after MSI was enabled */ 3981 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 3982 if (rc) { 3983 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3984 rc); 3985 goto disable_msi; 3986 } 3987 3988 /* Perform read from the device to flush all configuration */ 3989 RREG32(mmHW_STATE); 3990 3991 return 0; 3992 3993 disable_msi: 3994 gaudi_disable_msi(hdev); 3995 disable_queues: 3996 gaudi_disable_mme_qmans(hdev); 3997 gaudi_disable_pci_dma_qmans(hdev); 3998 3999 return rc; 4000 } 4001 4002 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4003 { 4004 struct cpu_dyn_regs *dyn_regs = 4005 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4006 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4007 struct gaudi_device *gaudi = hdev->asic_specific; 4008 bool driver_performs_reset; 4009 4010 if (!hard_reset) { 4011 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4012 return 0; 4013 } 4014 4015 if (hdev->pldm) { 4016 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4017 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4018 } else { 4019 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4020 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4021 } 4022 4023 if (fw_reset) { 4024 dev_dbg(hdev->dev, 4025 "Firmware performs HARD reset, going to wait %dms\n", 4026 reset_timeout_ms); 4027 4028 goto skip_reset; 4029 } 4030 4031 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4032 !hdev->asic_prop.hard_reset_done_by_fw); 4033 4034 /* Set device to handle FLR by H/W as we will put the device CPU to 4035 * halt mode 4036 */ 4037 if (driver_performs_reset) 4038 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4039 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4040 4041 /* If linux is loaded in the device CPU we need to communicate with it 4042 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4043 * registers in case of old F/Ws 4044 */ 4045 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4046 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4047 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4048 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4049 4050 WREG32(irq_handler_offset, 4051 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4052 4053 /* This is a hail-mary attempt to revive the card in the small chance that the 4054 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4055 * In that case, triggering reset through GIC won't help. We need to trigger the 4056 * reset as if Linux wasn't loaded. 4057 * 4058 * We do it only if the reset cause was HB, because that would be the indication 4059 * of such an event. 4060 * 4061 * In case watchdog hasn't expired but we still got HB, then this won't do any 4062 * damage. 4063 */ 4064 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4065 if (hdev->asic_prop.hard_reset_done_by_fw) 4066 hl_fw_ask_hard_reset_without_linux(hdev); 4067 else 4068 hl_fw_ask_halt_machine_without_linux(hdev); 4069 } 4070 } else { 4071 if (hdev->asic_prop.hard_reset_done_by_fw) 4072 hl_fw_ask_hard_reset_without_linux(hdev); 4073 else 4074 hl_fw_ask_halt_machine_without_linux(hdev); 4075 } 4076 4077 if (driver_performs_reset) { 4078 4079 /* Configure the reset registers. Must be done as early as 4080 * possible in case we fail during H/W initialization 4081 */ 4082 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4083 (CFG_RST_H_DMA_MASK | 4084 CFG_RST_H_MME_MASK | 4085 CFG_RST_H_SM_MASK | 4086 CFG_RST_H_TPC_7_MASK)); 4087 4088 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4089 4090 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4091 (CFG_RST_H_HBM_MASK | 4092 CFG_RST_H_TPC_7_MASK | 4093 CFG_RST_H_NIC_MASK | 4094 CFG_RST_H_SM_MASK | 4095 CFG_RST_H_DMA_MASK | 4096 CFG_RST_H_MME_MASK | 4097 CFG_RST_H_CPU_MASK | 4098 CFG_RST_H_MMU_MASK)); 4099 4100 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4101 (CFG_RST_L_IF_MASK | 4102 CFG_RST_L_PSOC_MASK | 4103 CFG_RST_L_TPC_MASK)); 4104 4105 msleep(cpu_timeout_ms); 4106 4107 /* Tell ASIC not to re-initialize PCIe */ 4108 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4109 4110 /* Restart BTL/BLR upon hard-reset */ 4111 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4112 4113 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4114 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4115 4116 dev_dbg(hdev->dev, 4117 "Issued HARD reset command, going to wait %dms\n", 4118 reset_timeout_ms); 4119 } else { 4120 dev_dbg(hdev->dev, 4121 "Firmware performs HARD reset, going to wait %dms\n", 4122 reset_timeout_ms); 4123 } 4124 4125 skip_reset: 4126 /* 4127 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4128 * itself is in reset. Need to wait until the reset is deasserted 4129 */ 4130 msleep(reset_timeout_ms); 4131 4132 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4133 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4134 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4135 return -ETIMEDOUT; 4136 } 4137 4138 if (gaudi) { 4139 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4140 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4141 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4142 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4143 HW_CAP_HBM_SCRAMBLER); 4144 4145 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4146 4147 hdev->device_cpu_is_halted = false; 4148 } 4149 return 0; 4150 } 4151 4152 static int gaudi_suspend(struct hl_device *hdev) 4153 { 4154 int rc; 4155 4156 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4157 if (rc) 4158 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4159 4160 return rc; 4161 } 4162 4163 static int gaudi_resume(struct hl_device *hdev) 4164 { 4165 return gaudi_init_iatu(hdev); 4166 } 4167 4168 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4169 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4170 { 4171 int rc; 4172 4173 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4174 VM_DONTCOPY | VM_NORESERVE); 4175 4176 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4177 (dma_addr - HOST_PHYS_BASE), size); 4178 if (rc) 4179 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4180 4181 return rc; 4182 } 4183 4184 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4185 { 4186 struct cpu_dyn_regs *dyn_regs = 4187 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4188 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4189 struct gaudi_device *gaudi = hdev->asic_specific; 4190 bool invalid_queue = false; 4191 int dma_id; 4192 4193 switch (hw_queue_id) { 4194 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4195 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4196 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4197 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4198 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4199 break; 4200 4201 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4202 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4203 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4204 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4205 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4206 break; 4207 4208 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4209 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4210 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4211 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4212 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4213 break; 4214 4215 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4216 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4217 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4218 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4219 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4220 break; 4221 4222 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4223 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4224 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4225 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4226 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4227 break; 4228 4229 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4230 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4231 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4232 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4233 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4234 break; 4235 4236 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4237 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4238 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4239 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4240 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4241 break; 4242 4243 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4244 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4245 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4246 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4247 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4248 break; 4249 4250 case GAUDI_QUEUE_ID_CPU_PQ: 4251 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4252 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4253 else 4254 invalid_queue = true; 4255 break; 4256 4257 case GAUDI_QUEUE_ID_MME_0_0: 4258 db_reg_offset = mmMME2_QM_PQ_PI_0; 4259 break; 4260 4261 case GAUDI_QUEUE_ID_MME_0_1: 4262 db_reg_offset = mmMME2_QM_PQ_PI_1; 4263 break; 4264 4265 case GAUDI_QUEUE_ID_MME_0_2: 4266 db_reg_offset = mmMME2_QM_PQ_PI_2; 4267 break; 4268 4269 case GAUDI_QUEUE_ID_MME_0_3: 4270 db_reg_offset = mmMME2_QM_PQ_PI_3; 4271 break; 4272 4273 case GAUDI_QUEUE_ID_MME_1_0: 4274 db_reg_offset = mmMME0_QM_PQ_PI_0; 4275 break; 4276 4277 case GAUDI_QUEUE_ID_MME_1_1: 4278 db_reg_offset = mmMME0_QM_PQ_PI_1; 4279 break; 4280 4281 case GAUDI_QUEUE_ID_MME_1_2: 4282 db_reg_offset = mmMME0_QM_PQ_PI_2; 4283 break; 4284 4285 case GAUDI_QUEUE_ID_MME_1_3: 4286 db_reg_offset = mmMME0_QM_PQ_PI_3; 4287 break; 4288 4289 case GAUDI_QUEUE_ID_TPC_0_0: 4290 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4291 break; 4292 4293 case GAUDI_QUEUE_ID_TPC_0_1: 4294 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4295 break; 4296 4297 case GAUDI_QUEUE_ID_TPC_0_2: 4298 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4299 break; 4300 4301 case GAUDI_QUEUE_ID_TPC_0_3: 4302 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4303 break; 4304 4305 case GAUDI_QUEUE_ID_TPC_1_0: 4306 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4307 break; 4308 4309 case GAUDI_QUEUE_ID_TPC_1_1: 4310 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4311 break; 4312 4313 case GAUDI_QUEUE_ID_TPC_1_2: 4314 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4315 break; 4316 4317 case GAUDI_QUEUE_ID_TPC_1_3: 4318 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4319 break; 4320 4321 case GAUDI_QUEUE_ID_TPC_2_0: 4322 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4323 break; 4324 4325 case GAUDI_QUEUE_ID_TPC_2_1: 4326 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4327 break; 4328 4329 case GAUDI_QUEUE_ID_TPC_2_2: 4330 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4331 break; 4332 4333 case GAUDI_QUEUE_ID_TPC_2_3: 4334 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4335 break; 4336 4337 case GAUDI_QUEUE_ID_TPC_3_0: 4338 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4339 break; 4340 4341 case GAUDI_QUEUE_ID_TPC_3_1: 4342 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4343 break; 4344 4345 case GAUDI_QUEUE_ID_TPC_3_2: 4346 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4347 break; 4348 4349 case GAUDI_QUEUE_ID_TPC_3_3: 4350 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4351 break; 4352 4353 case GAUDI_QUEUE_ID_TPC_4_0: 4354 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4355 break; 4356 4357 case GAUDI_QUEUE_ID_TPC_4_1: 4358 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4359 break; 4360 4361 case GAUDI_QUEUE_ID_TPC_4_2: 4362 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4363 break; 4364 4365 case GAUDI_QUEUE_ID_TPC_4_3: 4366 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4367 break; 4368 4369 case GAUDI_QUEUE_ID_TPC_5_0: 4370 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4371 break; 4372 4373 case GAUDI_QUEUE_ID_TPC_5_1: 4374 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4375 break; 4376 4377 case GAUDI_QUEUE_ID_TPC_5_2: 4378 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4379 break; 4380 4381 case GAUDI_QUEUE_ID_TPC_5_3: 4382 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4383 break; 4384 4385 case GAUDI_QUEUE_ID_TPC_6_0: 4386 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4387 break; 4388 4389 case GAUDI_QUEUE_ID_TPC_6_1: 4390 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4391 break; 4392 4393 case GAUDI_QUEUE_ID_TPC_6_2: 4394 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4395 break; 4396 4397 case GAUDI_QUEUE_ID_TPC_6_3: 4398 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4399 break; 4400 4401 case GAUDI_QUEUE_ID_TPC_7_0: 4402 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4403 break; 4404 4405 case GAUDI_QUEUE_ID_TPC_7_1: 4406 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4407 break; 4408 4409 case GAUDI_QUEUE_ID_TPC_7_2: 4410 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4411 break; 4412 4413 case GAUDI_QUEUE_ID_TPC_7_3: 4414 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4415 break; 4416 4417 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4418 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4419 invalid_queue = true; 4420 4421 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4422 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4423 break; 4424 4425 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4426 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4427 invalid_queue = true; 4428 4429 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4430 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4431 break; 4432 4433 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4434 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4435 invalid_queue = true; 4436 4437 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4438 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4439 break; 4440 4441 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4442 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4443 invalid_queue = true; 4444 4445 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4446 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4447 break; 4448 4449 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4450 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4451 invalid_queue = true; 4452 4453 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4454 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4455 break; 4456 4457 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4458 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4459 invalid_queue = true; 4460 4461 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4462 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4463 break; 4464 4465 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4466 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4467 invalid_queue = true; 4468 4469 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4470 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4471 break; 4472 4473 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4474 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4475 invalid_queue = true; 4476 4477 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4478 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4479 break; 4480 4481 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4482 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4483 invalid_queue = true; 4484 4485 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4486 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4487 break; 4488 4489 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4490 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4491 invalid_queue = true; 4492 4493 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4494 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4495 break; 4496 4497 default: 4498 invalid_queue = true; 4499 } 4500 4501 if (invalid_queue) { 4502 /* Should never get here */ 4503 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4504 hw_queue_id); 4505 return; 4506 } 4507 4508 db_value = pi; 4509 4510 /* ring the doorbell */ 4511 WREG32(db_reg_offset, db_value); 4512 4513 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4514 /* make sure device CPU will read latest data from host */ 4515 mb(); 4516 4517 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4518 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4519 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4520 4521 WREG32(irq_handler_offset, 4522 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4523 } 4524 } 4525 4526 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4527 struct hl_bd *bd) 4528 { 4529 __le64 *pbd = (__le64 *) bd; 4530 4531 /* The QMANs are on the host memory so a simple copy suffice */ 4532 pqe[0] = pbd[0]; 4533 pqe[1] = pbd[1]; 4534 } 4535 4536 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4537 dma_addr_t *dma_handle, gfp_t flags) 4538 { 4539 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4540 dma_handle, flags); 4541 4542 /* Shift to the device's base physical address of host memory */ 4543 if (kernel_addr) 4544 *dma_handle += HOST_PHYS_BASE; 4545 4546 return kernel_addr; 4547 } 4548 4549 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4550 void *cpu_addr, dma_addr_t dma_handle) 4551 { 4552 /* Cancel the device's base physical address of host memory */ 4553 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4554 4555 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4556 } 4557 4558 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4559 { 4560 struct asic_fixed_properties *prop = &hdev->asic_prop; 4561 u64 cur_addr = prop->dram_user_base_address; 4562 u32 chunk_size, busy; 4563 int rc, dma_id; 4564 4565 while (cur_addr < prop->dram_end_address) { 4566 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4567 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4568 4569 chunk_size = 4570 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4571 4572 dev_dbg(hdev->dev, 4573 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4574 cur_addr, cur_addr + chunk_size); 4575 4576 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4577 lower_32_bits(val)); 4578 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4579 upper_32_bits(val)); 4580 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4581 lower_32_bits(cur_addr)); 4582 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4583 upper_32_bits(cur_addr)); 4584 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4585 chunk_size); 4586 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4587 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4588 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4589 4590 cur_addr += chunk_size; 4591 4592 if (cur_addr == prop->dram_end_address) 4593 break; 4594 } 4595 4596 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4597 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4598 4599 rc = hl_poll_timeout( 4600 hdev, 4601 mmDMA0_CORE_STS0 + dma_offset, 4602 busy, 4603 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4604 1000, 4605 HBM_SCRUBBING_TIMEOUT_US); 4606 4607 if (rc) { 4608 dev_err(hdev->dev, 4609 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4610 dma_id); 4611 return -EIO; 4612 } 4613 } 4614 } 4615 4616 return 0; 4617 } 4618 4619 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4620 { 4621 struct asic_fixed_properties *prop = &hdev->asic_prop; 4622 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : 4623 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); 4624 u64 addr, size, val = hdev->memory_scrub_val; 4625 ktime_t timeout; 4626 int rc = 0; 4627 4628 if (!hdev->memory_scrub) 4629 return 0; 4630 4631 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4632 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4633 if (ktime_compare(ktime_get(), timeout) > 0) { 4634 dev_err(hdev->dev, "waiting for idle timeout\n"); 4635 return -ETIMEDOUT; 4636 } 4637 usleep_range((1000 >> 2) + 1, 1000); 4638 } 4639 4640 /* Scrub SRAM */ 4641 addr = prop->sram_user_base_address; 4642 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4643 4644 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4645 addr, addr + size, val); 4646 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4647 if (rc) { 4648 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4649 return rc; 4650 } 4651 4652 /* Scrub HBM using all DMA channels in parallel */ 4653 rc = gaudi_scrub_device_dram(hdev, val); 4654 if (rc) { 4655 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4656 return rc; 4657 } 4658 4659 return 0; 4660 } 4661 4662 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4663 u32 queue_id, dma_addr_t *dma_handle, 4664 u16 *queue_len) 4665 { 4666 struct gaudi_device *gaudi = hdev->asic_specific; 4667 struct gaudi_internal_qman_info *q; 4668 4669 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4670 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4671 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4672 return NULL; 4673 } 4674 4675 q = &gaudi->internal_qmans[queue_id]; 4676 *dma_handle = q->pq_dma_addr; 4677 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4678 4679 return q->pq_kernel_addr; 4680 } 4681 4682 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4683 u16 len, u32 timeout, u64 *result) 4684 { 4685 struct gaudi_device *gaudi = hdev->asic_specific; 4686 4687 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4688 if (result) 4689 *result = 0; 4690 return 0; 4691 } 4692 4693 if (!timeout) 4694 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4695 4696 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4697 timeout, result); 4698 } 4699 4700 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4701 { 4702 struct packet_msg_prot *fence_pkt; 4703 dma_addr_t pkt_dma_addr; 4704 u32 fence_val, tmp, timeout_usec; 4705 dma_addr_t fence_dma_addr; 4706 u32 *fence_ptr; 4707 int rc; 4708 4709 if (hdev->pldm) 4710 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4711 else 4712 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4713 4714 fence_val = GAUDI_QMAN0_FENCE_VAL; 4715 4716 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4717 if (!fence_ptr) { 4718 dev_err(hdev->dev, 4719 "Failed to allocate memory for H/W queue %d testing\n", 4720 hw_queue_id); 4721 return -ENOMEM; 4722 } 4723 4724 *fence_ptr = 0; 4725 4726 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4727 &pkt_dma_addr); 4728 if (!fence_pkt) { 4729 dev_err(hdev->dev, 4730 "Failed to allocate packet for H/W queue %d testing\n", 4731 hw_queue_id); 4732 rc = -ENOMEM; 4733 goto free_fence_ptr; 4734 } 4735 4736 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4737 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4738 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4739 4740 fence_pkt->ctl = cpu_to_le32(tmp); 4741 fence_pkt->value = cpu_to_le32(fence_val); 4742 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4743 4744 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4745 sizeof(struct packet_msg_prot), 4746 pkt_dma_addr); 4747 if (rc) { 4748 dev_err(hdev->dev, 4749 "Failed to send fence packet to H/W queue %d\n", 4750 hw_queue_id); 4751 goto free_pkt; 4752 } 4753 4754 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4755 1000, timeout_usec, true); 4756 4757 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4758 4759 if (rc == -ETIMEDOUT) { 4760 dev_err(hdev->dev, 4761 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4762 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4763 rc = -EIO; 4764 } 4765 4766 free_pkt: 4767 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4768 free_fence_ptr: 4769 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4770 return rc; 4771 } 4772 4773 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4774 { 4775 struct gaudi_device *gaudi = hdev->asic_specific; 4776 4777 /* 4778 * check capability here as send_cpu_message() won't update the result 4779 * value if no capability 4780 */ 4781 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4782 return 0; 4783 4784 return hl_fw_test_cpu_queue(hdev); 4785 } 4786 4787 static int gaudi_test_queues(struct hl_device *hdev) 4788 { 4789 int i, rc, ret_val = 0; 4790 4791 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4792 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4793 rc = gaudi_test_queue(hdev, i); 4794 if (rc) 4795 ret_val = -EINVAL; 4796 } 4797 } 4798 4799 rc = gaudi_test_cpu_queue(hdev); 4800 if (rc) 4801 ret_val = -EINVAL; 4802 4803 return ret_val; 4804 } 4805 4806 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4807 gfp_t mem_flags, dma_addr_t *dma_handle) 4808 { 4809 void *kernel_addr; 4810 4811 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4812 return NULL; 4813 4814 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4815 4816 /* Shift to the device's base physical address of host memory */ 4817 if (kernel_addr) 4818 *dma_handle += HOST_PHYS_BASE; 4819 4820 return kernel_addr; 4821 } 4822 4823 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4824 dma_addr_t dma_addr) 4825 { 4826 /* Cancel the device's base physical address of host memory */ 4827 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4828 4829 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4830 } 4831 4832 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4833 size_t size, dma_addr_t *dma_handle) 4834 { 4835 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4836 } 4837 4838 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4839 size_t size, void *vaddr) 4840 { 4841 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4842 } 4843 4844 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4845 { 4846 struct scatterlist *sg, *sg_next_iter; 4847 u32 count, dma_desc_cnt; 4848 u64 len, len_next; 4849 dma_addr_t addr, addr_next; 4850 4851 dma_desc_cnt = 0; 4852 4853 for_each_sgtable_dma_sg(sgt, sg, count) { 4854 len = sg_dma_len(sg); 4855 addr = sg_dma_address(sg); 4856 4857 if (len == 0) 4858 break; 4859 4860 while ((count + 1) < sgt->nents) { 4861 sg_next_iter = sg_next(sg); 4862 len_next = sg_dma_len(sg_next_iter); 4863 addr_next = sg_dma_address(sg_next_iter); 4864 4865 if (len_next == 0) 4866 break; 4867 4868 if ((addr + len == addr_next) && 4869 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4870 len += len_next; 4871 count++; 4872 sg = sg_next_iter; 4873 } else { 4874 break; 4875 } 4876 } 4877 4878 dma_desc_cnt++; 4879 } 4880 4881 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4882 } 4883 4884 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4885 struct hl_cs_parser *parser, 4886 struct packet_lin_dma *user_dma_pkt, 4887 u64 addr, enum dma_data_direction dir) 4888 { 4889 struct hl_userptr *userptr; 4890 int rc; 4891 4892 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4893 parser->job_userptr_list, &userptr)) 4894 goto already_pinned; 4895 4896 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4897 if (!userptr) 4898 return -ENOMEM; 4899 4900 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4901 userptr); 4902 if (rc) 4903 goto free_userptr; 4904 4905 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4906 4907 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 4908 if (rc) { 4909 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4910 goto unpin_memory; 4911 } 4912 4913 userptr->dma_mapped = true; 4914 userptr->dir = dir; 4915 4916 already_pinned: 4917 parser->patched_cb_size += 4918 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4919 4920 return 0; 4921 4922 unpin_memory: 4923 list_del(&userptr->job_node); 4924 hl_unpin_host_memory(hdev, userptr); 4925 free_userptr: 4926 kfree(userptr); 4927 return rc; 4928 } 4929 4930 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4931 struct hl_cs_parser *parser, 4932 struct packet_lin_dma *user_dma_pkt, 4933 bool src_in_host) 4934 { 4935 enum dma_data_direction dir; 4936 bool skip_host_mem_pin = false, user_memset; 4937 u64 addr; 4938 int rc = 0; 4939 4940 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 4941 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 4942 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 4943 4944 if (src_in_host) { 4945 if (user_memset) 4946 skip_host_mem_pin = true; 4947 4948 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 4949 dir = DMA_TO_DEVICE; 4950 addr = le64_to_cpu(user_dma_pkt->src_addr); 4951 } else { 4952 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 4953 dir = DMA_FROM_DEVICE; 4954 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4955 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4956 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4957 } 4958 4959 if (skip_host_mem_pin) 4960 parser->patched_cb_size += sizeof(*user_dma_pkt); 4961 else 4962 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 4963 addr, dir); 4964 4965 return rc; 4966 } 4967 4968 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 4969 struct hl_cs_parser *parser, 4970 struct packet_lin_dma *user_dma_pkt) 4971 { 4972 bool src_in_host = false; 4973 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4974 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4975 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4976 4977 dev_dbg(hdev->dev, "DMA packet details:\n"); 4978 dev_dbg(hdev->dev, "source == 0x%llx\n", 4979 le64_to_cpu(user_dma_pkt->src_addr)); 4980 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 4981 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 4982 4983 /* 4984 * Special handling for DMA with size 0. Bypass all validations 4985 * because no transactions will be done except for WR_COMP, which 4986 * is not a security issue 4987 */ 4988 if (!le32_to_cpu(user_dma_pkt->tsize)) { 4989 parser->patched_cb_size += sizeof(*user_dma_pkt); 4990 return 0; 4991 } 4992 4993 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 4994 src_in_host = true; 4995 4996 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 4997 src_in_host); 4998 } 4999 5000 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5001 struct hl_cs_parser *parser, 5002 struct packet_load_and_exe *user_pkt) 5003 { 5004 u32 cfg; 5005 5006 cfg = le32_to_cpu(user_pkt->cfg); 5007 5008 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5009 dev_err(hdev->dev, 5010 "User not allowed to use Load and Execute\n"); 5011 return -EPERM; 5012 } 5013 5014 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5015 5016 return 0; 5017 } 5018 5019 static int gaudi_validate_cb(struct hl_device *hdev, 5020 struct hl_cs_parser *parser, bool is_mmu) 5021 { 5022 u32 cb_parsed_length = 0; 5023 int rc = 0; 5024 5025 parser->patched_cb_size = 0; 5026 5027 /* cb_user_size is more than 0 so loop will always be executed */ 5028 while (cb_parsed_length < parser->user_cb_size) { 5029 enum packet_id pkt_id; 5030 u16 pkt_size; 5031 struct gaudi_packet *user_pkt; 5032 5033 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5034 5035 pkt_id = (enum packet_id) ( 5036 (le64_to_cpu(user_pkt->header) & 5037 PACKET_HEADER_PACKET_ID_MASK) >> 5038 PACKET_HEADER_PACKET_ID_SHIFT); 5039 5040 if (!validate_packet_id(pkt_id)) { 5041 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5042 rc = -EINVAL; 5043 break; 5044 } 5045 5046 pkt_size = gaudi_packet_sizes[pkt_id]; 5047 cb_parsed_length += pkt_size; 5048 if (cb_parsed_length > parser->user_cb_size) { 5049 dev_err(hdev->dev, 5050 "packet 0x%x is out of CB boundary\n", pkt_id); 5051 rc = -EINVAL; 5052 break; 5053 } 5054 5055 switch (pkt_id) { 5056 case PACKET_MSG_PROT: 5057 dev_err(hdev->dev, 5058 "User not allowed to use MSG_PROT\n"); 5059 rc = -EPERM; 5060 break; 5061 5062 case PACKET_CP_DMA: 5063 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5064 rc = -EPERM; 5065 break; 5066 5067 case PACKET_STOP: 5068 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5069 rc = -EPERM; 5070 break; 5071 5072 case PACKET_WREG_BULK: 5073 dev_err(hdev->dev, 5074 "User not allowed to use WREG_BULK\n"); 5075 rc = -EPERM; 5076 break; 5077 5078 case PACKET_LOAD_AND_EXE: 5079 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5080 (struct packet_load_and_exe *) user_pkt); 5081 break; 5082 5083 case PACKET_LIN_DMA: 5084 parser->contains_dma_pkt = true; 5085 if (is_mmu) 5086 parser->patched_cb_size += pkt_size; 5087 else 5088 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5089 (struct packet_lin_dma *) user_pkt); 5090 break; 5091 5092 case PACKET_WREG_32: 5093 case PACKET_MSG_LONG: 5094 case PACKET_MSG_SHORT: 5095 case PACKET_REPEAT: 5096 case PACKET_FENCE: 5097 case PACKET_NOP: 5098 case PACKET_ARB_POINT: 5099 parser->patched_cb_size += pkt_size; 5100 break; 5101 5102 default: 5103 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5104 pkt_id); 5105 rc = -EINVAL; 5106 break; 5107 } 5108 5109 if (rc) 5110 break; 5111 } 5112 5113 /* 5114 * The new CB should have space at the end for two MSG_PROT packets: 5115 * 1. Optional NOP padding for cacheline alignment 5116 * 2. A packet that will act as a completion packet 5117 * 3. A packet that will generate MSI interrupt 5118 */ 5119 if (parser->completion) 5120 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5121 parser->patched_cb_size); 5122 5123 return rc; 5124 } 5125 5126 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5127 struct hl_cs_parser *parser, 5128 struct packet_lin_dma *user_dma_pkt, 5129 struct packet_lin_dma *new_dma_pkt, 5130 u32 *new_dma_pkt_size) 5131 { 5132 struct hl_userptr *userptr; 5133 struct scatterlist *sg, *sg_next_iter; 5134 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5135 u64 len, len_next; 5136 dma_addr_t dma_addr, dma_addr_next; 5137 u64 device_memory_addr, addr; 5138 enum dma_data_direction dir; 5139 struct sg_table *sgt; 5140 bool src_in_host = false; 5141 bool skip_host_mem_pin = false; 5142 bool user_memset; 5143 5144 ctl = le32_to_cpu(user_dma_pkt->ctl); 5145 5146 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5147 src_in_host = true; 5148 5149 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5150 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5151 5152 if (src_in_host) { 5153 addr = le64_to_cpu(user_dma_pkt->src_addr); 5154 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5155 dir = DMA_TO_DEVICE; 5156 if (user_memset) 5157 skip_host_mem_pin = true; 5158 } else { 5159 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5160 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5161 dir = DMA_FROM_DEVICE; 5162 } 5163 5164 if ((!skip_host_mem_pin) && 5165 (!hl_userptr_is_pinned(hdev, addr, 5166 le32_to_cpu(user_dma_pkt->tsize), 5167 parser->job_userptr_list, &userptr))) { 5168 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5169 addr, user_dma_pkt->tsize); 5170 return -EFAULT; 5171 } 5172 5173 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5174 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5175 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5176 return 0; 5177 } 5178 5179 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5180 5181 sgt = userptr->sgt; 5182 dma_desc_cnt = 0; 5183 5184 for_each_sgtable_dma_sg(sgt, sg, count) { 5185 len = sg_dma_len(sg); 5186 dma_addr = sg_dma_address(sg); 5187 5188 if (len == 0) 5189 break; 5190 5191 while ((count + 1) < sgt->nents) { 5192 sg_next_iter = sg_next(sg); 5193 len_next = sg_dma_len(sg_next_iter); 5194 dma_addr_next = sg_dma_address(sg_next_iter); 5195 5196 if (len_next == 0) 5197 break; 5198 5199 if ((dma_addr + len == dma_addr_next) && 5200 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5201 len += len_next; 5202 count++; 5203 sg = sg_next_iter; 5204 } else { 5205 break; 5206 } 5207 } 5208 5209 ctl = le32_to_cpu(user_dma_pkt->ctl); 5210 if (likely(dma_desc_cnt)) 5211 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5212 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5213 new_dma_pkt->ctl = cpu_to_le32(ctl); 5214 new_dma_pkt->tsize = cpu_to_le32(len); 5215 5216 if (dir == DMA_TO_DEVICE) { 5217 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5218 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5219 } else { 5220 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5221 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5222 } 5223 5224 if (!user_memset) 5225 device_memory_addr += len; 5226 dma_desc_cnt++; 5227 new_dma_pkt++; 5228 } 5229 5230 if (!dma_desc_cnt) { 5231 dev_err(hdev->dev, 5232 "Error of 0 SG entries when patching DMA packet\n"); 5233 return -EFAULT; 5234 } 5235 5236 /* Fix the last dma packet - wrcomp must be as user set it */ 5237 new_dma_pkt--; 5238 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5239 5240 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5241 5242 return 0; 5243 } 5244 5245 static int gaudi_patch_cb(struct hl_device *hdev, 5246 struct hl_cs_parser *parser) 5247 { 5248 u32 cb_parsed_length = 0; 5249 u32 cb_patched_cur_length = 0; 5250 int rc = 0; 5251 5252 /* cb_user_size is more than 0 so loop will always be executed */ 5253 while (cb_parsed_length < parser->user_cb_size) { 5254 enum packet_id pkt_id; 5255 u16 pkt_size; 5256 u32 new_pkt_size = 0; 5257 struct gaudi_packet *user_pkt, *kernel_pkt; 5258 5259 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5260 kernel_pkt = parser->patched_cb->kernel_address + 5261 cb_patched_cur_length; 5262 5263 pkt_id = (enum packet_id) ( 5264 (le64_to_cpu(user_pkt->header) & 5265 PACKET_HEADER_PACKET_ID_MASK) >> 5266 PACKET_HEADER_PACKET_ID_SHIFT); 5267 5268 if (!validate_packet_id(pkt_id)) { 5269 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5270 rc = -EINVAL; 5271 break; 5272 } 5273 5274 pkt_size = gaudi_packet_sizes[pkt_id]; 5275 cb_parsed_length += pkt_size; 5276 if (cb_parsed_length > parser->user_cb_size) { 5277 dev_err(hdev->dev, 5278 "packet 0x%x is out of CB boundary\n", pkt_id); 5279 rc = -EINVAL; 5280 break; 5281 } 5282 5283 switch (pkt_id) { 5284 case PACKET_LIN_DMA: 5285 rc = gaudi_patch_dma_packet(hdev, parser, 5286 (struct packet_lin_dma *) user_pkt, 5287 (struct packet_lin_dma *) kernel_pkt, 5288 &new_pkt_size); 5289 cb_patched_cur_length += new_pkt_size; 5290 break; 5291 5292 case PACKET_MSG_PROT: 5293 dev_err(hdev->dev, 5294 "User not allowed to use MSG_PROT\n"); 5295 rc = -EPERM; 5296 break; 5297 5298 case PACKET_CP_DMA: 5299 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5300 rc = -EPERM; 5301 break; 5302 5303 case PACKET_STOP: 5304 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5305 rc = -EPERM; 5306 break; 5307 5308 case PACKET_WREG_32: 5309 case PACKET_WREG_BULK: 5310 case PACKET_MSG_LONG: 5311 case PACKET_MSG_SHORT: 5312 case PACKET_REPEAT: 5313 case PACKET_FENCE: 5314 case PACKET_NOP: 5315 case PACKET_ARB_POINT: 5316 case PACKET_LOAD_AND_EXE: 5317 memcpy(kernel_pkt, user_pkt, pkt_size); 5318 cb_patched_cur_length += pkt_size; 5319 break; 5320 5321 default: 5322 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5323 pkt_id); 5324 rc = -EINVAL; 5325 break; 5326 } 5327 5328 if (rc) 5329 break; 5330 } 5331 5332 return rc; 5333 } 5334 5335 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5336 struct hl_cs_parser *parser) 5337 { 5338 u64 handle; 5339 u32 patched_cb_size; 5340 struct hl_cb *user_cb; 5341 int rc; 5342 5343 /* 5344 * The new CB should have space at the end for two MSG_PROT packets: 5345 * 1. Optional NOP padding for cacheline alignment 5346 * 2. A packet that will act as a completion packet 5347 * 3. A packet that will generate MSI interrupt 5348 */ 5349 if (parser->completion) 5350 parser->patched_cb_size = parser->user_cb_size + 5351 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5352 else 5353 parser->patched_cb_size = parser->user_cb_size; 5354 5355 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5356 parser->patched_cb_size, false, false, 5357 &handle); 5358 5359 if (rc) { 5360 dev_err(hdev->dev, 5361 "Failed to allocate patched CB for DMA CS %d\n", 5362 rc); 5363 return rc; 5364 } 5365 5366 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5367 /* hl_cb_get should never fail */ 5368 if (!parser->patched_cb) { 5369 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5370 rc = -EFAULT; 5371 goto out; 5372 } 5373 5374 /* 5375 * We are protected from overflow because the check 5376 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5377 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5378 * 5379 * There is no option to reach here without going through that check because: 5380 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5381 * an external queue. 5382 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5383 */ 5384 memcpy(parser->patched_cb->kernel_address, 5385 parser->user_cb->kernel_address, 5386 parser->user_cb_size); 5387 5388 patched_cb_size = parser->patched_cb_size; 5389 5390 /* Validate patched CB instead of user CB */ 5391 user_cb = parser->user_cb; 5392 parser->user_cb = parser->patched_cb; 5393 rc = gaudi_validate_cb(hdev, parser, true); 5394 parser->user_cb = user_cb; 5395 5396 if (rc) { 5397 hl_cb_put(parser->patched_cb); 5398 goto out; 5399 } 5400 5401 if (patched_cb_size != parser->patched_cb_size) { 5402 dev_err(hdev->dev, "user CB size mismatch\n"); 5403 hl_cb_put(parser->patched_cb); 5404 rc = -EINVAL; 5405 goto out; 5406 } 5407 5408 out: 5409 /* 5410 * Always call cb destroy here because we still have 1 reference 5411 * to it by calling cb_get earlier. After the job will be completed, 5412 * cb_put will release it, but here we want to remove it from the 5413 * idr 5414 */ 5415 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5416 5417 return rc; 5418 } 5419 5420 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5421 struct hl_cs_parser *parser) 5422 { 5423 u64 handle; 5424 int rc; 5425 5426 rc = gaudi_validate_cb(hdev, parser, false); 5427 5428 if (rc) 5429 goto free_userptr; 5430 5431 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5432 parser->patched_cb_size, false, false, 5433 &handle); 5434 if (rc) { 5435 dev_err(hdev->dev, 5436 "Failed to allocate patched CB for DMA CS %d\n", rc); 5437 goto free_userptr; 5438 } 5439 5440 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5441 /* hl_cb_get should never fail here */ 5442 if (!parser->patched_cb) { 5443 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5444 rc = -EFAULT; 5445 goto out; 5446 } 5447 5448 rc = gaudi_patch_cb(hdev, parser); 5449 5450 if (rc) 5451 hl_cb_put(parser->patched_cb); 5452 5453 out: 5454 /* 5455 * Always call cb destroy here because we still have 1 reference 5456 * to it by calling cb_get earlier. After the job will be completed, 5457 * cb_put will release it, but here we want to remove it from the 5458 * idr 5459 */ 5460 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5461 5462 free_userptr: 5463 if (rc) 5464 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5465 return rc; 5466 } 5467 5468 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5469 struct hl_cs_parser *parser) 5470 { 5471 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5472 struct gaudi_device *gaudi = hdev->asic_specific; 5473 u32 nic_queue_offset, nic_mask_q_id; 5474 5475 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5476 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5477 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5478 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5479 5480 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5481 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5482 return -EINVAL; 5483 } 5484 } 5485 5486 /* For internal queue jobs just check if CB address is valid */ 5487 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5488 parser->user_cb_size, 5489 asic_prop->sram_user_base_address, 5490 asic_prop->sram_end_address)) 5491 return 0; 5492 5493 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5494 parser->user_cb_size, 5495 asic_prop->dram_user_base_address, 5496 asic_prop->dram_end_address)) 5497 return 0; 5498 5499 /* PMMU and HPMMU addresses are equal, check only one of them */ 5500 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5501 parser->user_cb_size, 5502 asic_prop->pmmu.start_addr, 5503 asic_prop->pmmu.end_addr)) 5504 return 0; 5505 5506 dev_err(hdev->dev, 5507 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5508 parser->user_cb, parser->user_cb_size); 5509 5510 return -EFAULT; 5511 } 5512 5513 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5514 { 5515 struct gaudi_device *gaudi = hdev->asic_specific; 5516 5517 if (parser->queue_type == QUEUE_TYPE_INT) 5518 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5519 5520 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5521 return gaudi_parse_cb_mmu(hdev, parser); 5522 else 5523 return gaudi_parse_cb_no_mmu(hdev, parser); 5524 } 5525 5526 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5527 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5528 u32 msi_vec, bool eb) 5529 { 5530 struct packet_msg_prot *cq_pkt; 5531 struct packet_nop *cq_padding; 5532 u64 msi_addr; 5533 u32 tmp; 5534 5535 cq_padding = kernel_address + original_len; 5536 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5537 5538 while ((void *)cq_padding < (void *)cq_pkt) { 5539 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5540 cq_padding++; 5541 } 5542 5543 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5544 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5545 5546 if (eb) 5547 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5548 5549 cq_pkt->ctl = cpu_to_le32(tmp); 5550 cq_pkt->value = cpu_to_le32(cq_val); 5551 cq_pkt->addr = cpu_to_le64(cq_addr); 5552 5553 cq_pkt++; 5554 5555 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5556 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5557 cq_pkt->ctl = cpu_to_le32(tmp); 5558 cq_pkt->value = cpu_to_le32(1); 5559 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; 5560 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5561 } 5562 5563 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5564 { 5565 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5566 } 5567 5568 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5569 u32 size, u64 val) 5570 { 5571 struct packet_lin_dma *lin_dma_pkt; 5572 struct hl_cs_job *job; 5573 u32 cb_size, ctl, err_cause; 5574 struct hl_cb *cb; 5575 int rc; 5576 5577 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5578 if (!cb) 5579 return -EFAULT; 5580 5581 lin_dma_pkt = cb->kernel_address; 5582 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5583 cb_size = sizeof(*lin_dma_pkt); 5584 5585 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5586 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5587 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5588 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5589 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5590 5591 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5592 lin_dma_pkt->src_addr = cpu_to_le64(val); 5593 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5594 lin_dma_pkt->tsize = cpu_to_le32(size); 5595 5596 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5597 if (!job) { 5598 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5599 rc = -ENOMEM; 5600 goto release_cb; 5601 } 5602 5603 /* Verify DMA is OK */ 5604 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5605 if (err_cause && !hdev->init_done) { 5606 dev_dbg(hdev->dev, 5607 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5608 err_cause); 5609 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5610 } 5611 5612 job->id = 0; 5613 job->user_cb = cb; 5614 atomic_inc(&job->user_cb->cs_cnt); 5615 job->user_cb_size = cb_size; 5616 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5617 job->patched_cb = job->user_cb; 5618 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5619 5620 hl_debugfs_add_job(hdev, job); 5621 5622 rc = gaudi_send_job_on_qman0(hdev, job); 5623 hl_debugfs_remove_job(hdev, job); 5624 kfree(job); 5625 atomic_dec(&cb->cs_cnt); 5626 5627 /* Verify DMA is OK */ 5628 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5629 if (err_cause) { 5630 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5631 rc = -EIO; 5632 if (!hdev->init_done) { 5633 dev_dbg(hdev->dev, 5634 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5635 err_cause); 5636 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5637 } 5638 } 5639 5640 release_cb: 5641 hl_cb_put(cb); 5642 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5643 5644 return rc; 5645 } 5646 5647 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5648 u32 num_regs, u32 val) 5649 { 5650 struct packet_msg_long *pkt; 5651 struct hl_cs_job *job; 5652 u32 cb_size, ctl; 5653 struct hl_cb *cb; 5654 int i, rc; 5655 5656 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5657 5658 if (cb_size > SZ_2M) { 5659 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5660 return -ENOMEM; 5661 } 5662 5663 cb = hl_cb_kernel_create(hdev, cb_size, false); 5664 if (!cb) 5665 return -EFAULT; 5666 5667 pkt = cb->kernel_address; 5668 5669 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5670 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5671 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5672 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5674 5675 for (i = 0; i < num_regs ; i++, pkt++) { 5676 pkt->ctl = cpu_to_le32(ctl); 5677 pkt->value = cpu_to_le32(val); 5678 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5679 } 5680 5681 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5682 if (!job) { 5683 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5684 rc = -ENOMEM; 5685 goto release_cb; 5686 } 5687 5688 job->id = 0; 5689 job->user_cb = cb; 5690 atomic_inc(&job->user_cb->cs_cnt); 5691 job->user_cb_size = cb_size; 5692 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5693 job->patched_cb = job->user_cb; 5694 job->job_cb_size = cb_size; 5695 5696 hl_debugfs_add_job(hdev, job); 5697 5698 rc = gaudi_send_job_on_qman0(hdev, job); 5699 hl_debugfs_remove_job(hdev, job); 5700 kfree(job); 5701 atomic_dec(&cb->cs_cnt); 5702 5703 release_cb: 5704 hl_cb_put(cb); 5705 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5706 5707 return rc; 5708 } 5709 5710 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5711 { 5712 u64 base_addr; 5713 u32 num_regs; 5714 int rc; 5715 5716 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5717 num_regs = NUM_OF_SOB_IN_BLOCK; 5718 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5719 if (rc) { 5720 dev_err(hdev->dev, "failed resetting SM registers"); 5721 return -ENOMEM; 5722 } 5723 5724 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5725 num_regs = NUM_OF_SOB_IN_BLOCK; 5726 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5727 if (rc) { 5728 dev_err(hdev->dev, "failed resetting SM registers"); 5729 return -ENOMEM; 5730 } 5731 5732 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5733 num_regs = NUM_OF_SOB_IN_BLOCK; 5734 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5735 if (rc) { 5736 dev_err(hdev->dev, "failed resetting SM registers"); 5737 return -ENOMEM; 5738 } 5739 5740 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5741 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5742 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5743 if (rc) { 5744 dev_err(hdev->dev, "failed resetting SM registers"); 5745 return -ENOMEM; 5746 } 5747 5748 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5749 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5750 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5751 if (rc) { 5752 dev_err(hdev->dev, "failed resetting SM registers"); 5753 return -ENOMEM; 5754 } 5755 5756 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5757 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5758 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5759 if (rc) { 5760 dev_err(hdev->dev, "failed resetting SM registers"); 5761 return -ENOMEM; 5762 } 5763 5764 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5765 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5766 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5767 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5768 if (rc) { 5769 dev_err(hdev->dev, "failed resetting SM registers"); 5770 return -ENOMEM; 5771 } 5772 5773 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5774 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5775 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5776 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5777 if (rc) { 5778 dev_err(hdev->dev, "failed resetting SM registers"); 5779 return -ENOMEM; 5780 } 5781 5782 return 0; 5783 } 5784 5785 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5786 { 5787 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5788 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5789 int i; 5790 5791 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5792 u64 sob_addr = CFG_BASE + 5793 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5794 (i * sob_delta); 5795 u32 dma_offset = i * DMA_CORE_OFFSET; 5796 5797 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5798 lower_32_bits(sob_addr)); 5799 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5800 upper_32_bits(sob_addr)); 5801 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5802 5803 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5804 * modified by the user for SRAM reduction 5805 */ 5806 if (i > 1) 5807 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5808 0x00000001); 5809 } 5810 } 5811 5812 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5813 { 5814 u32 qman_offset; 5815 int i; 5816 5817 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5818 qman_offset = i * DMA_QMAN_OFFSET; 5819 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5820 } 5821 5822 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5823 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5824 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5825 } 5826 5827 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5828 qman_offset = i * TPC_QMAN_OFFSET; 5829 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5830 } 5831 5832 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5833 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5834 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5835 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5836 } 5837 } 5838 5839 static int gaudi_restore_user_registers(struct hl_device *hdev) 5840 { 5841 int rc; 5842 5843 rc = gaudi_restore_sm_registers(hdev); 5844 if (rc) 5845 return rc; 5846 5847 gaudi_restore_dma_registers(hdev); 5848 gaudi_restore_qm_registers(hdev); 5849 5850 return 0; 5851 } 5852 5853 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5854 { 5855 return 0; 5856 } 5857 5858 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5859 { 5860 u32 size = hdev->asic_prop.mmu_pgt_size + 5861 hdev->asic_prop.mmu_cache_mng_size; 5862 struct gaudi_device *gaudi = hdev->asic_specific; 5863 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5864 5865 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5866 return 0; 5867 5868 return gaudi_memset_device_memory(hdev, addr, size, 0); 5869 } 5870 5871 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5872 { 5873 5874 } 5875 5876 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5877 u32 size_to_dma, dma_addr_t dma_addr) 5878 { 5879 u32 err_cause, val; 5880 u64 dma_offset; 5881 int rc; 5882 5883 dma_offset = dma_id * DMA_CORE_OFFSET; 5884 5885 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5886 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5887 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5888 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5889 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5890 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5891 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5892 5893 rc = hl_poll_timeout( 5894 hdev, 5895 mmDMA0_CORE_STS0 + dma_offset, 5896 val, 5897 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5898 0, 5899 1000000); 5900 5901 if (rc) { 5902 dev_err(hdev->dev, 5903 "DMA %d timed-out during reading of 0x%llx\n", 5904 dma_id, addr); 5905 return -EIO; 5906 } 5907 5908 /* Verify DMA is OK */ 5909 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5910 if (err_cause) { 5911 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5912 dev_dbg(hdev->dev, 5913 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5914 err_cause); 5915 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5916 5917 return -EIO; 5918 } 5919 5920 return 0; 5921 } 5922 5923 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5924 void *blob_addr) 5925 { 5926 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5927 u32 qm_glbl_sts0, qm_cgm_sts; 5928 u64 dma_offset, qm_offset; 5929 dma_addr_t dma_addr; 5930 void *kernel_addr; 5931 bool is_eng_idle; 5932 int rc = 0, dma_id; 5933 5934 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 5935 5936 if (!kernel_addr) 5937 return -ENOMEM; 5938 5939 hdev->asic_funcs->hw_queues_lock(hdev); 5940 5941 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 5942 dma_offset = dma_id * DMA_CORE_OFFSET; 5943 qm_offset = dma_id * DMA_QMAN_OFFSET; 5944 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5945 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5946 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5947 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5948 IS_DMA_IDLE(dma_core_sts0); 5949 5950 if (!is_eng_idle) { 5951 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 5952 dma_offset = dma_id * DMA_CORE_OFFSET; 5953 qm_offset = dma_id * DMA_QMAN_OFFSET; 5954 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5955 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5956 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5957 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5958 IS_DMA_IDLE(dma_core_sts0); 5959 5960 if (!is_eng_idle) { 5961 dev_err_ratelimited(hdev->dev, 5962 "Can't read via DMA because it is BUSY\n"); 5963 rc = -EAGAIN; 5964 goto out; 5965 } 5966 } 5967 5968 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 5969 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 5970 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 5971 5972 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 5973 * using the compute ctx ASID, if exists. If not, use the kernel ctx 5974 * ASID 5975 */ 5976 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 5977 5978 /* Verify DMA is OK */ 5979 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5980 if (err_cause) { 5981 dev_dbg(hdev->dev, 5982 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5983 err_cause); 5984 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5985 } 5986 5987 pos = 0; 5988 size_left = size; 5989 size_to_dma = SZ_2M; 5990 5991 while (size_left > 0) { 5992 5993 if (size_left < SZ_2M) 5994 size_to_dma = size_left; 5995 5996 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 5997 dma_addr); 5998 if (rc) 5999 break; 6000 6001 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6002 6003 if (size_left <= SZ_2M) 6004 break; 6005 6006 pos += SZ_2M; 6007 addr += SZ_2M; 6008 size_left -= SZ_2M; 6009 } 6010 6011 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6012 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6013 * ASID 6014 */ 6015 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6016 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6017 6018 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6019 6020 out: 6021 hdev->asic_funcs->hw_queues_unlock(hdev); 6022 6023 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6024 6025 return rc; 6026 } 6027 6028 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6029 { 6030 struct gaudi_device *gaudi = hdev->asic_specific; 6031 6032 if (hdev->reset_info.hard_reset_pending) 6033 return U64_MAX; 6034 6035 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6036 (addr - gaudi->hbm_bar_cur_addr)); 6037 } 6038 6039 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6040 { 6041 struct gaudi_device *gaudi = hdev->asic_specific; 6042 6043 if (hdev->reset_info.hard_reset_pending) 6044 return; 6045 6046 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6047 (addr - gaudi->hbm_bar_cur_addr)); 6048 } 6049 6050 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6051 { 6052 /* mask to zero the MMBP and ASID bits */ 6053 WREG32_AND(reg, ~0x7FF); 6054 WREG32_OR(reg, asid); 6055 } 6056 6057 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6058 { 6059 struct gaudi_device *gaudi = hdev->asic_specific; 6060 6061 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6062 return; 6063 6064 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6065 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6066 return; 6067 } 6068 6069 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6070 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6071 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6074 6075 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6076 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6077 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6080 6081 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6082 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6083 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6086 6087 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6088 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6089 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6092 6093 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6094 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6095 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6098 6099 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6100 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6101 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6104 6105 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6106 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6107 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6110 6111 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6112 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6113 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6116 6117 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6118 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6119 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6120 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6121 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6122 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6123 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6124 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6125 6126 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6127 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6128 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6133 6134 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6135 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6136 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6141 6142 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6143 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6144 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6149 6150 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6152 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6157 6158 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6160 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6165 6166 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6167 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6168 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6173 6174 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6176 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6181 6182 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6184 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6189 6190 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6191 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6192 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6200 6201 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6207 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6213 6214 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6215 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6216 asid); 6217 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6218 asid); 6219 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6220 asid); 6221 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6222 asid); 6223 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6224 asid); 6225 } 6226 6227 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6229 asid); 6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6231 asid); 6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6233 asid); 6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6235 asid); 6236 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6237 asid); 6238 } 6239 6240 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6241 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6242 asid); 6243 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6244 asid); 6245 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6246 asid); 6247 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6248 asid); 6249 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6250 asid); 6251 } 6252 6253 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6255 asid); 6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6257 asid); 6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6259 asid); 6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6261 asid); 6262 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6263 asid); 6264 } 6265 6266 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6267 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6268 asid); 6269 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6270 asid); 6271 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6272 asid); 6273 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6274 asid); 6275 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6276 asid); 6277 } 6278 6279 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6281 asid); 6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6283 asid); 6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6285 asid); 6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6287 asid); 6288 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6289 asid); 6290 } 6291 6292 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6293 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6294 asid); 6295 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6296 asid); 6297 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6298 asid); 6299 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6300 asid); 6301 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6302 asid); 6303 } 6304 6305 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6307 asid); 6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6309 asid); 6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6311 asid); 6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6313 asid); 6314 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6315 asid); 6316 } 6317 6318 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6319 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6320 asid); 6321 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6322 asid); 6323 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6324 asid); 6325 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6326 asid); 6327 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6328 asid); 6329 } 6330 6331 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6333 asid); 6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6335 asid); 6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6337 asid); 6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6339 asid); 6340 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6341 asid); 6342 } 6343 6344 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6345 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6346 } 6347 6348 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6349 struct hl_cs_job *job) 6350 { 6351 struct packet_msg_prot *fence_pkt; 6352 u32 *fence_ptr; 6353 dma_addr_t fence_dma_addr; 6354 struct hl_cb *cb; 6355 u32 tmp, timeout, dma_offset; 6356 int rc; 6357 6358 if (hdev->pldm) 6359 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6360 else 6361 timeout = HL_DEVICE_TIMEOUT_USEC; 6362 6363 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6364 if (!fence_ptr) { 6365 dev_err(hdev->dev, 6366 "Failed to allocate fence memory for QMAN0\n"); 6367 return -ENOMEM; 6368 } 6369 6370 cb = job->patched_cb; 6371 6372 fence_pkt = cb->kernel_address + 6373 job->job_cb_size - sizeof(struct packet_msg_prot); 6374 6375 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6376 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6377 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6378 6379 fence_pkt->ctl = cpu_to_le32(tmp); 6380 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6381 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6382 6383 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6384 6385 WREG32(mmDMA0_CORE_PROT + dma_offset, 6386 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6387 6388 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6389 job->job_cb_size, cb->bus_address); 6390 if (rc) { 6391 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6392 goto free_fence_ptr; 6393 } 6394 6395 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6396 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6397 timeout, true); 6398 6399 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6400 6401 if (rc == -ETIMEDOUT) { 6402 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6403 goto free_fence_ptr; 6404 } 6405 6406 free_fence_ptr: 6407 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6408 6409 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6410 return rc; 6411 } 6412 6413 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6414 { 6415 if (event_type >= GAUDI_EVENT_SIZE) 6416 goto event_not_supported; 6417 6418 if (!gaudi_irq_map_table[event_type].valid) 6419 goto event_not_supported; 6420 6421 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6422 6423 return; 6424 6425 event_not_supported: 6426 snprintf(desc, size, "N/A"); 6427 } 6428 6429 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6430 bool is_write, u16 *engine_id_1, 6431 u16 *engine_id_2) 6432 { 6433 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6434 6435 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6436 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6437 6438 switch (x_y) { 6439 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6440 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6441 dma_id[0] = 0; 6442 dma_id[1] = 2; 6443 break; 6444 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6445 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6446 dma_id[0] = 1; 6447 dma_id[1] = 3; 6448 break; 6449 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6450 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6451 dma_id[0] = 4; 6452 dma_id[1] = 6; 6453 break; 6454 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6455 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6456 dma_id[0] = 5; 6457 dma_id[1] = 7; 6458 break; 6459 default: 6460 goto unknown_initiator; 6461 } 6462 6463 for (i = 0 ; i < 2 ; i++) { 6464 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6465 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6466 } 6467 6468 switch (x_y) { 6469 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6470 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6471 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6472 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6473 return "DMA0"; 6474 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6475 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6476 return "DMA2"; 6477 } else { 6478 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6479 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6480 return "DMA0 or DMA2"; 6481 } 6482 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6483 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6484 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6485 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6486 return "DMA1"; 6487 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6488 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6489 return "DMA3"; 6490 } else { 6491 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6492 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6493 return "DMA1 or DMA3"; 6494 } 6495 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6496 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6497 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6498 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6499 return "DMA4"; 6500 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6501 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6502 return "DMA6"; 6503 } else { 6504 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6505 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6506 return "DMA4 or DMA6"; 6507 } 6508 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6509 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6510 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6511 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6512 return "DMA5"; 6513 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6514 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6515 return "DMA7"; 6516 } else { 6517 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6518 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6519 return "DMA5 or DMA7"; 6520 } 6521 } 6522 6523 unknown_initiator: 6524 return "unknown initiator"; 6525 } 6526 6527 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6528 u16 *engine_id_1, u16 *engine_id_2) 6529 { 6530 u32 val, x_y, axi_id; 6531 6532 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6533 RREG32(mmMMU_UP_RAZWI_READ_ID); 6534 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6535 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6536 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6537 RAZWI_INITIATOR_AXI_ID_SHIFT); 6538 6539 switch (x_y) { 6540 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6541 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6542 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6543 return "TPC0"; 6544 } 6545 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6546 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6547 return "NIC0"; 6548 } 6549 break; 6550 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6551 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6552 return "TPC1"; 6553 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6554 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6555 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6556 return "MME0"; 6557 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6558 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6559 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6560 return "MME1"; 6561 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6562 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6563 return "TPC2"; 6564 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6565 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6566 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6567 return "TPC3"; 6568 } 6569 /* PCI, CPU or PSOC does not have engine id*/ 6570 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6571 return "PCI"; 6572 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6573 return "CPU"; 6574 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6575 return "PSOC"; 6576 break; 6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6578 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6579 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6585 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6586 engine_id_1, engine_id_2); 6587 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6588 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6589 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6590 return "TPC4"; 6591 } 6592 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6593 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6594 return "NIC1"; 6595 } 6596 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6597 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6598 return "NIC2"; 6599 } 6600 break; 6601 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6602 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6603 return "TPC5"; 6604 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6605 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6606 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6607 return "MME2"; 6608 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6609 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6610 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6611 return "MME3"; 6612 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6613 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6614 return "TPC6"; 6615 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6616 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6617 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6618 return "TPC7"; 6619 } 6620 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6621 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6622 return "NIC4"; 6623 } 6624 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6625 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6626 return "NIC5"; 6627 } 6628 break; 6629 default: 6630 break; 6631 } 6632 6633 dev_err(hdev->dev, 6634 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6635 val, 6636 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6637 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6638 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6639 RAZWI_INITIATOR_AXI_ID_MASK); 6640 6641 return "unknown initiator"; 6642 } 6643 6644 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6645 u16 *engine_id_2, bool *is_read, bool *is_write) 6646 { 6647 6648 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6649 dev_err_ratelimited(hdev->dev, 6650 "RAZWI event caused by illegal write of %s\n", 6651 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6652 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6653 *is_write = true; 6654 } 6655 6656 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6657 dev_err_ratelimited(hdev->dev, 6658 "RAZWI event caused by illegal read of %s\n", 6659 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6660 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6661 *is_read = true; 6662 } 6663 } 6664 6665 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6666 { 6667 struct gaudi_device *gaudi = hdev->asic_specific; 6668 u32 val; 6669 6670 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6671 return; 6672 6673 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6674 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6675 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6676 *addr <<= 32; 6677 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6678 6679 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6680 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6681 6682 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6683 } 6684 6685 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6686 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6687 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6688 *addr <<= 32; 6689 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6690 6691 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6692 6693 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6694 } 6695 } 6696 6697 /* 6698 * +-------------------+------------------------------------------------------+ 6699 * | Configuration Reg | Description | 6700 * | Address | | 6701 * +-------------------+------------------------------------------------------+ 6702 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6703 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6704 * | |0xF34 memory wrappers 63:32 | 6705 * | |0xF38 memory wrappers 95:64 | 6706 * | |0xF3C memory wrappers 127:96 | 6707 * +-------------------+------------------------------------------------------+ 6708 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6709 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6710 * | |0xF44 memory wrappers 63:32 | 6711 * | |0xF48 memory wrappers 95:64 | 6712 * | |0xF4C memory wrappers 127:96 | 6713 * +-------------------+------------------------------------------------------+ 6714 */ 6715 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6716 struct ecc_info_extract_params *params, u64 *ecc_address, 6717 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6718 { 6719 u32 i, num_mem_regs, reg, err_bit; 6720 u64 err_addr, err_word = 0; 6721 6722 num_mem_regs = params->num_memories / 32 + 6723 ((params->num_memories % 32) ? 1 : 0); 6724 6725 if (params->block_address >= CFG_BASE) 6726 params->block_address -= CFG_BASE; 6727 6728 if (params->derr) 6729 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6730 else 6731 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6732 6733 /* Set invalid wrapper index */ 6734 *memory_wrapper_idx = 0xFF; 6735 6736 /* Iterate through memory wrappers, a single bit must be set */ 6737 for (i = 0 ; i < num_mem_regs ; i++) { 6738 err_addr += i * 4; 6739 err_word = RREG32(err_addr); 6740 if (err_word) { 6741 err_bit = __ffs(err_word); 6742 *memory_wrapper_idx = err_bit + (32 * i); 6743 break; 6744 } 6745 } 6746 6747 if (*memory_wrapper_idx == 0xFF) { 6748 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6749 return -EINVAL; 6750 } 6751 6752 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6753 *memory_wrapper_idx); 6754 6755 *ecc_address = 6756 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6757 *ecc_syndrom = 6758 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6759 6760 /* Clear error indication */ 6761 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6762 if (params->derr) 6763 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6764 else 6765 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6766 6767 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6768 6769 return 0; 6770 } 6771 6772 /* 6773 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6774 * 6775 * @idx: the current pi/ci value 6776 * @q_len: the queue length (power of 2) 6777 * 6778 * @return the cyclically decremented index 6779 */ 6780 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6781 { 6782 u32 mask = q_len - 1; 6783 6784 /* 6785 * modular decrement is equivalent to adding (queue_size -1) 6786 * later we take LSBs to make sure the value is in the 6787 * range [0, queue_len - 1] 6788 */ 6789 return (idx + q_len - 1) & mask; 6790 } 6791 6792 /** 6793 * gaudi_handle_sw_config_stream_data - print SW config stream data 6794 * 6795 * @hdev: pointer to the habanalabs device structure 6796 * @stream: the QMAN's stream 6797 * @qman_base: base address of QMAN registers block 6798 * @event_mask: mask of the last events occurred 6799 */ 6800 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6801 u64 qman_base, u64 event_mask) 6802 { 6803 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6804 u32 cq_ptr_lo_off, size; 6805 6806 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6807 6808 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6809 stream * cq_ptr_lo_off; 6810 cq_ptr_hi = cq_ptr_lo + 6811 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6812 cq_tsize = cq_ptr_lo + 6813 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6814 6815 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6816 size = RREG32(cq_tsize); 6817 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6818 stream, cq_ptr, size); 6819 6820 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6821 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6822 hdev->captured_err_info.undef_opcode.cq_size = size; 6823 hdev->captured_err_info.undef_opcode.stream_id = stream; 6824 } 6825 } 6826 6827 /** 6828 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6829 * 6830 * @hdev: pointer to the habanalabs device structure 6831 * @qid_base: first QID of the QMAN (out of 4 streams) 6832 * @stream: the QMAN's stream 6833 * @qman_base: base address of QMAN registers block 6834 * @event_mask: mask of the last events occurred 6835 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6836 */ 6837 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6838 u32 stream, u64 qman_base, 6839 u64 event_mask, 6840 bool pr_sw_conf) 6841 { 6842 u32 ci, qm_ci_stream_off, queue_len; 6843 struct hl_hw_queue *q; 6844 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6845 int i; 6846 6847 q = &hdev->kernel_queues[qid_base + stream]; 6848 6849 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6850 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6851 stream * qm_ci_stream_off; 6852 6853 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6854 q->int_queue_len : HL_QUEUE_LENGTH; 6855 6856 hdev->asic_funcs->hw_queues_lock(hdev); 6857 6858 if (pr_sw_conf) 6859 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6860 6861 ci = RREG32(pq_ci); 6862 6863 /* we should start printing form ci -1 */ 6864 ci = gaudi_queue_idx_dec(ci, queue_len); 6865 memset(addr, 0, sizeof(addr)); 6866 6867 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6868 struct hl_bd *bd; 6869 u32 len; 6870 6871 bd = q->kernel_address; 6872 bd += ci; 6873 6874 len = le32_to_cpu(bd->len); 6875 /* len 0 means uninitialized entry- break */ 6876 if (!len) 6877 break; 6878 6879 addr[i] = le64_to_cpu(bd->ptr); 6880 6881 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6882 stream, ci, addr[i], len); 6883 6884 /* get previous ci, wrap if needed */ 6885 ci = gaudi_queue_idx_dec(ci, queue_len); 6886 } 6887 6888 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6889 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6890 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6891 6892 if (arr_idx == 0) { 6893 undef_opcode->timestamp = ktime_get(); 6894 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6895 } 6896 6897 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6898 undef_opcode->cb_addr_streams_len++; 6899 } 6900 6901 hdev->asic_funcs->hw_queues_unlock(hdev); 6902 } 6903 6904 /** 6905 * handle_qman_data_on_err - extract QMAN data on error 6906 * 6907 * @hdev: pointer to the habanalabs device structure 6908 * @qid_base: first QID of the QMAN (out of 4 streams) 6909 * @stream: the QMAN's stream 6910 * @qman_base: base address of QMAN registers block 6911 * @event_mask: mask of the last events occurred 6912 * 6913 * This function attempt to exatract as much data as possible on QMAN error. 6914 * On upper CP print the SW config stream data and last 8 PQEs. 6915 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6916 */ 6917 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6918 u32 stream, u64 qman_base, u64 event_mask) 6919 { 6920 u32 i; 6921 6922 if (stream != QMAN_STREAMS) { 6923 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6924 qman_base, event_mask, true); 6925 return; 6926 } 6927 6928 /* handle Lower-CP */ 6929 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6930 6931 for (i = 0; i < QMAN_STREAMS; i++) 6932 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 6933 qman_base, event_mask, false); 6934 } 6935 6936 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 6937 const char *qm_name, 6938 u64 qman_base, 6939 u32 qid_base, 6940 u64 *event_mask) 6941 { 6942 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 6943 u64 glbl_sts_addr, arb_err_addr; 6944 char reg_desc[32]; 6945 6946 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 6947 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 6948 6949 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 6950 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 6951 glbl_sts_clr_val = 0; 6952 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 6953 6954 if (!glbl_sts_val) 6955 continue; 6956 6957 if (i == QMAN_STREAMS) 6958 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 6959 else 6960 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 6961 6962 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 6963 if (glbl_sts_val & BIT(j)) { 6964 dev_err_ratelimited(hdev->dev, 6965 "%s %s. err cause: %s\n", 6966 qm_name, reg_desc, 6967 gaudi_qman_error_cause[j]); 6968 glbl_sts_clr_val |= BIT(j); 6969 } 6970 } 6971 /* check for undefined opcode */ 6972 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 6973 hdev->captured_err_info.undef_opcode.write_enable) { 6974 memset(&hdev->captured_err_info.undef_opcode, 0, 6975 sizeof(hdev->captured_err_info.undef_opcode)); 6976 6977 hdev->captured_err_info.undef_opcode.write_enable = false; 6978 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 6979 } 6980 6981 /* Write 1 clear errors */ 6982 if (!hdev->stop_on_err) 6983 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 6984 else 6985 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 6986 } 6987 6988 arb_err_val = RREG32(arb_err_addr); 6989 6990 if (!arb_err_val) 6991 return; 6992 6993 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 6994 if (arb_err_val & BIT(j)) { 6995 dev_err_ratelimited(hdev->dev, 6996 "%s ARB_ERR. err cause: %s\n", 6997 qm_name, 6998 gaudi_qman_arb_error_cause[j]); 6999 } 7000 } 7001 } 7002 7003 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7004 struct hl_eq_sm_sei_data *sei_data) 7005 { 7006 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7007 7008 /* Flip the bits as the enum is ordered in the opposite way */ 7009 index = (index ^ 0x3) & 0x3; 7010 7011 switch (sei_data->sei_cause) { 7012 case SM_SEI_SO_OVERFLOW: 7013 dev_err_ratelimited(hdev->dev, 7014 "%s SEI Error: SOB Group %u overflow/underflow", 7015 gaudi_sync_manager_names[index], 7016 le32_to_cpu(sei_data->sei_log)); 7017 break; 7018 case SM_SEI_LBW_4B_UNALIGNED: 7019 dev_err_ratelimited(hdev->dev, 7020 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7021 gaudi_sync_manager_names[index], 7022 le32_to_cpu(sei_data->sei_log)); 7023 break; 7024 case SM_SEI_AXI_RESPONSE_ERR: 7025 dev_err_ratelimited(hdev->dev, 7026 "%s SEI Error: AXI ID %u response error", 7027 gaudi_sync_manager_names[index], 7028 le32_to_cpu(sei_data->sei_log)); 7029 break; 7030 default: 7031 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7032 le32_to_cpu(sei_data->sei_log)); 7033 break; 7034 } 7035 } 7036 7037 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7038 struct hl_eq_ecc_data *ecc_data) 7039 { 7040 struct ecc_info_extract_params params; 7041 u64 ecc_address = 0, ecc_syndrom = 0; 7042 u8 index, memory_wrapper_idx = 0; 7043 bool extract_info_from_fw; 7044 int rc; 7045 7046 if (hdev->asic_prop.fw_security_enabled) { 7047 extract_info_from_fw = true; 7048 goto extract_ecc_info; 7049 } 7050 7051 switch (event_type) { 7052 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7053 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7054 extract_info_from_fw = true; 7055 break; 7056 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7057 index = event_type - GAUDI_EVENT_TPC0_SERR; 7058 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7059 params.num_memories = 90; 7060 params.derr = false; 7061 extract_info_from_fw = false; 7062 break; 7063 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7064 index = event_type - GAUDI_EVENT_TPC0_DERR; 7065 params.block_address = 7066 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7067 params.num_memories = 90; 7068 params.derr = true; 7069 extract_info_from_fw = false; 7070 break; 7071 case GAUDI_EVENT_MME0_ACC_SERR: 7072 case GAUDI_EVENT_MME1_ACC_SERR: 7073 case GAUDI_EVENT_MME2_ACC_SERR: 7074 case GAUDI_EVENT_MME3_ACC_SERR: 7075 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7076 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7077 params.num_memories = 128; 7078 params.derr = false; 7079 extract_info_from_fw = false; 7080 break; 7081 case GAUDI_EVENT_MME0_ACC_DERR: 7082 case GAUDI_EVENT_MME1_ACC_DERR: 7083 case GAUDI_EVENT_MME2_ACC_DERR: 7084 case GAUDI_EVENT_MME3_ACC_DERR: 7085 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7086 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7087 params.num_memories = 128; 7088 params.derr = true; 7089 extract_info_from_fw = false; 7090 break; 7091 case GAUDI_EVENT_MME0_SBAB_SERR: 7092 case GAUDI_EVENT_MME1_SBAB_SERR: 7093 case GAUDI_EVENT_MME2_SBAB_SERR: 7094 case GAUDI_EVENT_MME3_SBAB_SERR: 7095 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7096 params.block_address = 7097 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7098 params.num_memories = 33; 7099 params.derr = false; 7100 extract_info_from_fw = false; 7101 break; 7102 case GAUDI_EVENT_MME0_SBAB_DERR: 7103 case GAUDI_EVENT_MME1_SBAB_DERR: 7104 case GAUDI_EVENT_MME2_SBAB_DERR: 7105 case GAUDI_EVENT_MME3_SBAB_DERR: 7106 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7107 params.block_address = 7108 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7109 params.num_memories = 33; 7110 params.derr = true; 7111 extract_info_from_fw = false; 7112 break; 7113 default: 7114 return; 7115 } 7116 7117 extract_ecc_info: 7118 if (extract_info_from_fw) { 7119 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7120 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7121 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7122 } else { 7123 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7124 &ecc_syndrom, &memory_wrapper_idx); 7125 if (rc) 7126 return; 7127 } 7128 7129 dev_err(hdev->dev, 7130 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7131 ecc_address, ecc_syndrom, memory_wrapper_idx); 7132 } 7133 7134 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7135 { 7136 u64 qman_base; 7137 char desc[32]; 7138 u32 qid_base; 7139 u8 index; 7140 7141 switch (event_type) { 7142 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7143 index = event_type - GAUDI_EVENT_TPC0_QM; 7144 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7145 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7146 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7147 break; 7148 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7149 if (event_type == GAUDI_EVENT_MME0_QM) { 7150 index = 0; 7151 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7152 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7153 index = 2; 7154 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7155 } 7156 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7157 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7158 break; 7159 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7160 index = event_type - GAUDI_EVENT_DMA0_QM; 7161 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7162 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7163 if (index > 1) 7164 qid_base++; 7165 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7166 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7167 break; 7168 case GAUDI_EVENT_NIC0_QM0: 7169 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7170 qman_base = mmNIC0_QM0_BASE; 7171 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7172 break; 7173 case GAUDI_EVENT_NIC0_QM1: 7174 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7175 qman_base = mmNIC0_QM1_BASE; 7176 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7177 break; 7178 case GAUDI_EVENT_NIC1_QM0: 7179 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7180 qman_base = mmNIC1_QM0_BASE; 7181 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7182 break; 7183 case GAUDI_EVENT_NIC1_QM1: 7184 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7185 qman_base = mmNIC1_QM1_BASE; 7186 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7187 break; 7188 case GAUDI_EVENT_NIC2_QM0: 7189 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7190 qman_base = mmNIC2_QM0_BASE; 7191 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7192 break; 7193 case GAUDI_EVENT_NIC2_QM1: 7194 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7195 qman_base = mmNIC2_QM1_BASE; 7196 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7197 break; 7198 case GAUDI_EVENT_NIC3_QM0: 7199 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7200 qman_base = mmNIC3_QM0_BASE; 7201 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7202 break; 7203 case GAUDI_EVENT_NIC3_QM1: 7204 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7205 qman_base = mmNIC3_QM1_BASE; 7206 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7207 break; 7208 case GAUDI_EVENT_NIC4_QM0: 7209 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7210 qman_base = mmNIC4_QM0_BASE; 7211 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7212 break; 7213 case GAUDI_EVENT_NIC4_QM1: 7214 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7215 qman_base = mmNIC4_QM1_BASE; 7216 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7217 break; 7218 default: 7219 return; 7220 } 7221 7222 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7223 } 7224 7225 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7226 bool check_razwi, u64 *event_mask) 7227 { 7228 bool is_read = false, is_write = false; 7229 u16 engine_id[2], num_of_razwi_eng = 0; 7230 char desc[64] = ""; 7231 u64 razwi_addr = 0; 7232 u8 razwi_flags = 0; 7233 7234 /* 7235 * Init engine id by default as not valid and only if razwi initiated from engine with 7236 * engine id it will get valid value. 7237 */ 7238 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7239 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7240 7241 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7242 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7243 event_type, desc); 7244 7245 if (check_razwi) { 7246 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7247 &is_write); 7248 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7249 7250 if (is_read) 7251 razwi_flags |= HL_RAZWI_READ; 7252 if (is_write) 7253 razwi_flags |= HL_RAZWI_WRITE; 7254 7255 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7256 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7257 num_of_razwi_eng = 2; 7258 else 7259 num_of_razwi_eng = 1; 7260 } 7261 7262 if (razwi_flags) 7263 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7264 razwi_flags, event_mask); 7265 } 7266 } 7267 7268 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7269 struct cpucp_pkt_sync_err *sync_err) 7270 { 7271 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7272 7273 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7274 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7275 } 7276 7277 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7278 struct hl_eq_fw_alive *fw_alive) 7279 { 7280 dev_err(hdev->dev, 7281 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7282 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7283 le32_to_cpu(fw_alive->process_id), 7284 le32_to_cpu(fw_alive->thread_id), 7285 le64_to_cpu(fw_alive->uptime_seconds)); 7286 } 7287 7288 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7289 void *data) 7290 { 7291 char desc[64] = "", *type; 7292 struct eq_nic_sei_event *eq_nic_sei = data; 7293 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7294 7295 switch (eq_nic_sei->axi_error_cause) { 7296 case RXB: 7297 type = "RXB"; 7298 break; 7299 case RXE: 7300 type = "RXE"; 7301 break; 7302 case TXS: 7303 type = "TXS"; 7304 break; 7305 case TXE: 7306 type = "TXE"; 7307 break; 7308 case QPC_RESP: 7309 type = "QPC_RESP"; 7310 break; 7311 case NON_AXI_ERR: 7312 type = "NON_AXI_ERR"; 7313 break; 7314 case TMR: 7315 type = "TMR"; 7316 break; 7317 default: 7318 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7319 eq_nic_sei->axi_error_cause); 7320 type = "N/A"; 7321 break; 7322 } 7323 7324 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7325 eq_nic_sei->id); 7326 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7327 event_type, desc); 7328 } 7329 7330 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7331 { 7332 /* GAUDI doesn't support any reset except hard-reset */ 7333 return -EPERM; 7334 } 7335 7336 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7337 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7338 { 7339 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7340 int rc = 0; 7341 7342 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7343 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7344 if (!hbm_ecc_data) { 7345 dev_err(hdev->dev, "No FW ECC data"); 7346 return 0; 7347 } 7348 7349 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7350 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7351 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7352 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7353 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7354 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7355 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7356 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7357 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7358 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7359 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7360 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7361 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7362 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7363 7364 dev_err(hdev->dev, 7365 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7366 device, ch, wr_par, rd_par, ca_par, serr, derr); 7367 dev_err(hdev->dev, 7368 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7369 device, ch, hbm_ecc_data->first_addr, type, 7370 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7371 hbm_ecc_data->dec_cnt); 7372 return 0; 7373 } 7374 7375 if (hdev->asic_prop.fw_security_enabled) { 7376 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7377 return 0; 7378 } 7379 7380 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7381 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7382 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7383 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7384 if (val) { 7385 rc = -EIO; 7386 dev_err(hdev->dev, 7387 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7388 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7389 (val >> 2) & 0x1, (val >> 3) & 0x1, 7390 (val >> 4) & 0x1); 7391 7392 val2 = RREG32(base + ch * 0x1000 + 0x060); 7393 dev_err(hdev->dev, 7394 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7395 device, ch * 2, 7396 RREG32(base + ch * 0x1000 + 0x064), 7397 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7398 (val2 & 0xFF0000) >> 16, 7399 (val2 & 0xFF000000) >> 24); 7400 } 7401 7402 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7403 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7404 if (val) { 7405 rc = -EIO; 7406 dev_err(hdev->dev, 7407 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7408 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7409 (val >> 2) & 0x1, (val >> 3) & 0x1, 7410 (val >> 4) & 0x1); 7411 7412 val2 = RREG32(base + ch * 0x1000 + 0x070); 7413 dev_err(hdev->dev, 7414 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7415 device, ch * 2 + 1, 7416 RREG32(base + ch * 0x1000 + 0x074), 7417 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7418 (val2 & 0xFF0000) >> 16, 7419 (val2 & 0xFF000000) >> 24); 7420 } 7421 7422 /* Clear interrupts */ 7423 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7424 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7425 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7426 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7427 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7428 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7429 } 7430 7431 val = RREG32(base + 0x8F30); 7432 val2 = RREG32(base + 0x8F34); 7433 if (val | val2) { 7434 rc = -EIO; 7435 dev_err(hdev->dev, 7436 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7437 device, val, val2); 7438 } 7439 val = RREG32(base + 0x8F40); 7440 val2 = RREG32(base + 0x8F44); 7441 if (val | val2) { 7442 rc = -EIO; 7443 dev_err(hdev->dev, 7444 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7445 device, val, val2); 7446 } 7447 7448 return rc; 7449 } 7450 7451 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7452 { 7453 switch (hbm_event_type) { 7454 case GAUDI_EVENT_HBM0_SPI_0: 7455 case GAUDI_EVENT_HBM0_SPI_1: 7456 return 0; 7457 case GAUDI_EVENT_HBM1_SPI_0: 7458 case GAUDI_EVENT_HBM1_SPI_1: 7459 return 1; 7460 case GAUDI_EVENT_HBM2_SPI_0: 7461 case GAUDI_EVENT_HBM2_SPI_1: 7462 return 2; 7463 case GAUDI_EVENT_HBM3_SPI_0: 7464 case GAUDI_EVENT_HBM3_SPI_1: 7465 return 3; 7466 default: 7467 break; 7468 } 7469 7470 /* Should never happen */ 7471 return 0; 7472 } 7473 7474 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7475 char *interrupt_name) 7476 { 7477 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7478 bool soft_reset_required = false; 7479 7480 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7481 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7482 7483 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7484 if (tpc_interrupts_cause & BIT(i)) { 7485 dev_err_ratelimited(hdev->dev, 7486 "TPC%d_%s interrupt cause: %s\n", 7487 tpc_id, interrupt_name, 7488 gaudi_tpc_interrupts_cause[i]); 7489 /* If this is QM error, we need to soft-reset */ 7490 if (i == 15) 7491 soft_reset_required = true; 7492 } 7493 7494 /* Clear interrupts */ 7495 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7496 7497 return soft_reset_required; 7498 } 7499 7500 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7501 { 7502 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7503 } 7504 7505 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7506 { 7507 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7508 } 7509 7510 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7511 { 7512 ktime_t zero_time = ktime_set(0, 0); 7513 7514 mutex_lock(&hdev->clk_throttling.lock); 7515 7516 switch (event_type) { 7517 case GAUDI_EVENT_FIX_POWER_ENV_S: 7518 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7519 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7520 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7521 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7522 dev_info_ratelimited(hdev->dev, 7523 "Clock throttling due to power consumption\n"); 7524 break; 7525 7526 case GAUDI_EVENT_FIX_POWER_ENV_E: 7527 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7528 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7529 dev_info_ratelimited(hdev->dev, 7530 "Power envelop is safe, back to optimal clock\n"); 7531 break; 7532 7533 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7534 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7535 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7536 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7537 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7538 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7539 dev_info_ratelimited(hdev->dev, 7540 "Clock throttling due to overheating\n"); 7541 break; 7542 7543 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7544 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7545 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7546 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7547 dev_info_ratelimited(hdev->dev, 7548 "Thermal envelop is safe, back to optimal clock\n"); 7549 break; 7550 7551 default: 7552 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7553 event_type); 7554 break; 7555 } 7556 7557 mutex_unlock(&hdev->clk_throttling.lock); 7558 } 7559 7560 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7561 { 7562 struct gaudi_device *gaudi = hdev->asic_specific; 7563 struct hl_info_fw_err_info fw_err_info; 7564 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7565 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7566 u32 fw_fatal_err_flag = 0, flags = 0; 7567 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7568 >> EQ_CTL_EVENT_TYPE_SHIFT); 7569 bool reset_required, reset_direct = false; 7570 u8 cause; 7571 int rc; 7572 7573 if (event_type >= GAUDI_EVENT_SIZE) { 7574 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7575 event_type, GAUDI_EVENT_SIZE - 1); 7576 return; 7577 } 7578 7579 gaudi->events_stat[event_type]++; 7580 gaudi->events_stat_aggregate[event_type]++; 7581 7582 switch (event_type) { 7583 case GAUDI_EVENT_PCIE_CORE_DERR: 7584 case GAUDI_EVENT_PCIE_IF_DERR: 7585 case GAUDI_EVENT_PCIE_PHY_DERR: 7586 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7587 case GAUDI_EVENT_MME0_ACC_DERR: 7588 case GAUDI_EVENT_MME0_SBAB_DERR: 7589 case GAUDI_EVENT_MME1_ACC_DERR: 7590 case GAUDI_EVENT_MME1_SBAB_DERR: 7591 case GAUDI_EVENT_MME2_ACC_DERR: 7592 case GAUDI_EVENT_MME2_SBAB_DERR: 7593 case GAUDI_EVENT_MME3_ACC_DERR: 7594 case GAUDI_EVENT_MME3_SBAB_DERR: 7595 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7596 fallthrough; 7597 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7598 case GAUDI_EVENT_PSOC_MEM_DERR: 7599 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7600 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7601 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7602 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7603 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7604 case GAUDI_EVENT_MMU_DERR: 7605 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7606 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7607 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7608 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7609 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7610 goto reset_device; 7611 7612 case GAUDI_EVENT_GIC500: 7613 case GAUDI_EVENT_AXI_ECC: 7614 case GAUDI_EVENT_L2_RAM_ECC: 7615 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7616 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7617 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7618 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7619 goto reset_device; 7620 7621 case GAUDI_EVENT_HBM0_SPI_0: 7622 case GAUDI_EVENT_HBM1_SPI_0: 7623 case GAUDI_EVENT_HBM2_SPI_0: 7624 case GAUDI_EVENT_HBM3_SPI_0: 7625 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7626 gaudi_hbm_read_interrupts(hdev, 7627 gaudi_hbm_event_to_dev(event_type), 7628 &eq_entry->hbm_ecc_data); 7629 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7630 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7631 goto reset_device; 7632 7633 case GAUDI_EVENT_HBM0_SPI_1: 7634 case GAUDI_EVENT_HBM1_SPI_1: 7635 case GAUDI_EVENT_HBM2_SPI_1: 7636 case GAUDI_EVENT_HBM3_SPI_1: 7637 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7638 gaudi_hbm_read_interrupts(hdev, 7639 gaudi_hbm_event_to_dev(event_type), 7640 &eq_entry->hbm_ecc_data); 7641 hl_fw_unmask_irq(hdev, event_type); 7642 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7643 break; 7644 7645 case GAUDI_EVENT_TPC0_DEC: 7646 case GAUDI_EVENT_TPC1_DEC: 7647 case GAUDI_EVENT_TPC2_DEC: 7648 case GAUDI_EVENT_TPC3_DEC: 7649 case GAUDI_EVENT_TPC4_DEC: 7650 case GAUDI_EVENT_TPC5_DEC: 7651 case GAUDI_EVENT_TPC6_DEC: 7652 case GAUDI_EVENT_TPC7_DEC: 7653 /* In TPC DEC event, notify on TPC assertion. While there isn't 7654 * a specific event for assertion yet, the FW generates TPC DEC event. 7655 * The SW upper layer will inspect an internal mapped area to indicate 7656 * if the event is a TPC Assertion or a "real" TPC DEC. 7657 */ 7658 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7659 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7660 reset_required = gaudi_tpc_read_interrupts(hdev, 7661 tpc_dec_event_to_tpc_id(event_type), 7662 "AXI_SLV_DEC_Error"); 7663 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7664 if (reset_required) { 7665 dev_err(hdev->dev, "reset required due to %s\n", 7666 gaudi_irq_map_table[event_type].name); 7667 7668 reset_direct = true; 7669 goto reset_device; 7670 } else { 7671 hl_fw_unmask_irq(hdev, event_type); 7672 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7673 } 7674 break; 7675 7676 case GAUDI_EVENT_TPC0_KRN_ERR: 7677 case GAUDI_EVENT_TPC1_KRN_ERR: 7678 case GAUDI_EVENT_TPC2_KRN_ERR: 7679 case GAUDI_EVENT_TPC3_KRN_ERR: 7680 case GAUDI_EVENT_TPC4_KRN_ERR: 7681 case GAUDI_EVENT_TPC5_KRN_ERR: 7682 case GAUDI_EVENT_TPC6_KRN_ERR: 7683 case GAUDI_EVENT_TPC7_KRN_ERR: 7684 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7685 reset_required = gaudi_tpc_read_interrupts(hdev, 7686 tpc_krn_event_to_tpc_id(event_type), 7687 "KRN_ERR"); 7688 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7689 if (reset_required) { 7690 dev_err(hdev->dev, "reset required due to %s\n", 7691 gaudi_irq_map_table[event_type].name); 7692 7693 reset_direct = true; 7694 goto reset_device; 7695 } else { 7696 hl_fw_unmask_irq(hdev, event_type); 7697 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7698 } 7699 break; 7700 7701 case GAUDI_EVENT_PCIE_CORE_SERR: 7702 case GAUDI_EVENT_PCIE_IF_SERR: 7703 case GAUDI_EVENT_PCIE_PHY_SERR: 7704 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7705 case GAUDI_EVENT_MME0_ACC_SERR: 7706 case GAUDI_EVENT_MME0_SBAB_SERR: 7707 case GAUDI_EVENT_MME1_ACC_SERR: 7708 case GAUDI_EVENT_MME1_SBAB_SERR: 7709 case GAUDI_EVENT_MME2_ACC_SERR: 7710 case GAUDI_EVENT_MME2_SBAB_SERR: 7711 case GAUDI_EVENT_MME3_ACC_SERR: 7712 case GAUDI_EVENT_MME3_SBAB_SERR: 7713 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7714 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7715 case GAUDI_EVENT_PSOC_MEM_SERR: 7716 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7717 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7718 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7719 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7720 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7721 fallthrough; 7722 case GAUDI_EVENT_MMU_SERR: 7723 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7724 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7725 hl_fw_unmask_irq(hdev, event_type); 7726 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7727 break; 7728 7729 case GAUDI_EVENT_PCIE_DEC: 7730 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7731 case GAUDI_EVENT_PSOC_AXI_DEC: 7732 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7733 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7734 hl_fw_unmask_irq(hdev, event_type); 7735 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7736 break; 7737 7738 case GAUDI_EVENT_MMU_PAGE_FAULT: 7739 case GAUDI_EVENT_MMU_WR_PERM: 7740 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7741 hl_fw_unmask_irq(hdev, event_type); 7742 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7743 break; 7744 7745 case GAUDI_EVENT_MME0_WBC_RSP: 7746 case GAUDI_EVENT_MME0_SBAB0_RSP: 7747 case GAUDI_EVENT_MME1_WBC_RSP: 7748 case GAUDI_EVENT_MME1_SBAB0_RSP: 7749 case GAUDI_EVENT_MME2_WBC_RSP: 7750 case GAUDI_EVENT_MME2_SBAB0_RSP: 7751 case GAUDI_EVENT_MME3_WBC_RSP: 7752 case GAUDI_EVENT_MME3_SBAB0_RSP: 7753 case GAUDI_EVENT_RAZWI_OR_ADC: 7754 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7755 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7756 fallthrough; 7757 case GAUDI_EVENT_NIC0_QM0: 7758 case GAUDI_EVENT_NIC0_QM1: 7759 case GAUDI_EVENT_NIC1_QM0: 7760 case GAUDI_EVENT_NIC1_QM1: 7761 case GAUDI_EVENT_NIC2_QM0: 7762 case GAUDI_EVENT_NIC2_QM1: 7763 case GAUDI_EVENT_NIC3_QM0: 7764 case GAUDI_EVENT_NIC3_QM1: 7765 case GAUDI_EVENT_NIC4_QM0: 7766 case GAUDI_EVENT_NIC4_QM1: 7767 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7768 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7769 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7770 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7771 hl_fw_unmask_irq(hdev, event_type); 7772 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7773 break; 7774 7775 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7776 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7777 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7778 goto reset_device; 7779 7780 case GAUDI_EVENT_TPC0_BMON_SPMU: 7781 case GAUDI_EVENT_TPC1_BMON_SPMU: 7782 case GAUDI_EVENT_TPC2_BMON_SPMU: 7783 case GAUDI_EVENT_TPC3_BMON_SPMU: 7784 case GAUDI_EVENT_TPC4_BMON_SPMU: 7785 case GAUDI_EVENT_TPC5_BMON_SPMU: 7786 case GAUDI_EVENT_TPC6_BMON_SPMU: 7787 case GAUDI_EVENT_TPC7_BMON_SPMU: 7788 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7789 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7790 hl_fw_unmask_irq(hdev, event_type); 7791 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7792 break; 7793 7794 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7795 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7796 hl_fw_unmask_irq(hdev, event_type); 7797 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7798 break; 7799 7800 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7801 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7802 gaudi_print_sm_sei_info(hdev, event_type, 7803 &eq_entry->sm_sei_data); 7804 rc = hl_state_dump(hdev); 7805 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7806 if (rc) 7807 dev_err(hdev->dev, 7808 "Error during system state dump %d\n", rc); 7809 hl_fw_unmask_irq(hdev, event_type); 7810 break; 7811 7812 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7813 break; 7814 7815 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7816 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7817 hl_fw_unmask_irq(hdev, event_type); 7818 break; 7819 7820 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7821 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7822 dev_err(hdev->dev, 7823 "Received high temp H/W interrupt %d (cause %d)\n", 7824 event_type, cause); 7825 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7826 break; 7827 7828 case GAUDI_EVENT_DEV_RESET_REQ: 7829 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7830 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7831 goto reset_device; 7832 7833 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7834 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7835 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7836 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7837 goto reset_device; 7838 7839 case GAUDI_EVENT_FW_ALIVE_S: 7840 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7841 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7842 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7843 fw_err_info.event_id = event_type; 7844 fw_err_info.event_mask = &event_mask; 7845 hl_handle_fw_err(hdev, &fw_err_info); 7846 goto reset_device; 7847 7848 default: 7849 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7850 event_type); 7851 break; 7852 } 7853 7854 if (event_mask) 7855 hl_notifier_event_send_all(hdev, event_mask); 7856 7857 return; 7858 7859 reset_device: 7860 reset_required = true; 7861 7862 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7863 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7864 7865 /* notify on device unavailable while the reset triggered by fw */ 7866 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7867 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7868 } else if (hdev->hard_reset_on_fw_events) { 7869 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7870 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7871 } else { 7872 reset_required = false; 7873 } 7874 7875 if (reset_required) { 7876 /* escalate general hw errors to critical/fatal error */ 7877 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7878 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7879 7880 hl_device_cond_reset(hdev, flags, event_mask); 7881 } else { 7882 hl_fw_unmask_irq(hdev, event_type); 7883 /* Notification on occurred event needs to be sent although reset is not executed */ 7884 if (event_mask) 7885 hl_notifier_event_send_all(hdev, event_mask); 7886 } 7887 } 7888 7889 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7890 { 7891 struct gaudi_device *gaudi = hdev->asic_specific; 7892 7893 if (aggregate) { 7894 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7895 return gaudi->events_stat_aggregate; 7896 } 7897 7898 *size = (u32) sizeof(gaudi->events_stat); 7899 return gaudi->events_stat; 7900 } 7901 7902 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7903 { 7904 struct gaudi_device *gaudi = hdev->asic_specific; 7905 u32 status, timeout_usec; 7906 int rc; 7907 7908 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7909 hdev->reset_info.hard_reset_pending) 7910 return 0; 7911 7912 if (hdev->pldm) 7913 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7914 else 7915 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7916 7917 /* L0 & L1 invalidation */ 7918 WREG32(mmSTLB_INV_PS, 3); 7919 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7920 WREG32(mmSTLB_INV_PS, 2); 7921 7922 rc = hl_poll_timeout( 7923 hdev, 7924 mmSTLB_INV_PS, 7925 status, 7926 !status, 7927 1000, 7928 timeout_usec); 7929 7930 WREG32(mmSTLB_INV_SET, 0); 7931 7932 return rc; 7933 } 7934 7935 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 7936 bool is_hard, u32 flags, 7937 u32 asid, u64 va, u64 size) 7938 { 7939 /* Treat as invalidate all because there is no range invalidation 7940 * in Gaudi 7941 */ 7942 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 7943 } 7944 7945 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 7946 { 7947 u32 status, timeout_usec; 7948 int rc; 7949 7950 if (hdev->pldm) 7951 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7952 else 7953 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7954 7955 WREG32(MMU_ASID, asid); 7956 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 7957 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 7958 WREG32(MMU_BUSY, 0x80000000); 7959 7960 rc = hl_poll_timeout( 7961 hdev, 7962 MMU_BUSY, 7963 status, 7964 !(status & 0x80000000), 7965 1000, 7966 timeout_usec); 7967 7968 if (rc) { 7969 dev_err(hdev->dev, 7970 "Timeout during MMU hop0 config of asid %d\n", asid); 7971 return rc; 7972 } 7973 7974 return 0; 7975 } 7976 7977 static int gaudi_send_heartbeat(struct hl_device *hdev) 7978 { 7979 struct gaudi_device *gaudi = hdev->asic_specific; 7980 7981 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7982 return 0; 7983 7984 return hl_fw_send_heartbeat(hdev); 7985 } 7986 7987 static int gaudi_cpucp_info_get(struct hl_device *hdev) 7988 { 7989 struct gaudi_device *gaudi = hdev->asic_specific; 7990 struct asic_fixed_properties *prop = &hdev->asic_prop; 7991 int rc; 7992 7993 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7994 return 0; 7995 7996 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 7997 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 7998 mmCPU_BOOT_ERR1); 7999 if (rc) 8000 return rc; 8001 8002 if (!strlen(prop->cpucp_info.card_name)) 8003 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8004 CARD_NAME_MAX_LEN); 8005 8006 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8007 8008 set_default_power_values(hdev); 8009 8010 return 0; 8011 } 8012 8013 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8014 struct engines_data *e) 8015 { 8016 struct gaudi_device *gaudi = hdev->asic_specific; 8017 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8018 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8019 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8020 unsigned long *mask = (unsigned long *)mask_arr; 8021 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8022 bool is_idle = true, is_eng_idle, is_slave; 8023 u64 offset; 8024 int i, dma_id, port; 8025 8026 if (e) 8027 hl_engine_data_sprintf(e, 8028 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8029 "--- ------- ------------ ---------- -------------\n"); 8030 8031 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8032 dma_id = gaudi_dma_assignment[i]; 8033 offset = dma_id * DMA_QMAN_OFFSET; 8034 8035 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8036 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8037 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8038 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8039 IS_DMA_IDLE(dma_core_sts0); 8040 is_idle &= is_eng_idle; 8041 8042 if (mask && !is_eng_idle) 8043 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8044 if (e) 8045 hl_engine_data_sprintf(e, fmt, dma_id, 8046 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8047 qm_cgm_sts, dma_core_sts0); 8048 } 8049 8050 if (e) 8051 hl_engine_data_sprintf(e, 8052 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8053 "--- ------- ------------ ---------- ----------\n"); 8054 8055 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8056 offset = i * TPC_QMAN_OFFSET; 8057 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8058 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8059 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8060 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8061 IS_TPC_IDLE(tpc_cfg_sts); 8062 is_idle &= is_eng_idle; 8063 8064 if (mask && !is_eng_idle) 8065 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8066 if (e) 8067 hl_engine_data_sprintf(e, fmt, i, 8068 is_eng_idle ? "Y" : "N", 8069 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8070 } 8071 8072 if (e) 8073 hl_engine_data_sprintf(e, 8074 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8075 "--- ------- ------------ ---------- -----------\n"); 8076 8077 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8078 offset = i * MME_QMAN_OFFSET; 8079 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8080 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8081 8082 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8083 is_slave = i % 2; 8084 if (!is_slave) { 8085 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8086 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8087 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8088 } 8089 8090 is_idle &= is_eng_idle; 8091 8092 if (mask && !is_eng_idle) 8093 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8094 if (e) { 8095 if (!is_slave) 8096 hl_engine_data_sprintf(e, fmt, i, 8097 is_eng_idle ? "Y" : "N", 8098 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8099 else 8100 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8101 is_eng_idle ? "Y" : "N", "-", 8102 "-", mme_arch_sts); 8103 } 8104 } 8105 8106 if (e) 8107 hl_engine_data_sprintf(e, 8108 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8109 "--- ------- ------------ ----------\n"); 8110 8111 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8112 offset = i * NIC_MACRO_QMAN_OFFSET; 8113 port = 2 * i; 8114 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8115 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8116 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8117 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8118 is_idle &= is_eng_idle; 8119 8120 if (mask && !is_eng_idle) 8121 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8122 if (e) 8123 hl_engine_data_sprintf(e, nic_fmt, port, 8124 is_eng_idle ? "Y" : "N", 8125 qm_glbl_sts0, qm_cgm_sts); 8126 } 8127 8128 port = 2 * i + 1; 8129 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8130 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8131 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8132 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8133 is_idle &= is_eng_idle; 8134 8135 if (mask && !is_eng_idle) 8136 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8137 if (e) 8138 hl_engine_data_sprintf(e, nic_fmt, port, 8139 is_eng_idle ? "Y" : "N", 8140 qm_glbl_sts0, qm_cgm_sts); 8141 } 8142 } 8143 8144 if (e) 8145 hl_engine_data_sprintf(e, "\n"); 8146 8147 return is_idle; 8148 } 8149 8150 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8151 __acquires(&gaudi->hw_queues_lock) 8152 { 8153 struct gaudi_device *gaudi = hdev->asic_specific; 8154 8155 spin_lock(&gaudi->hw_queues_lock); 8156 } 8157 8158 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8159 __releases(&gaudi->hw_queues_lock) 8160 { 8161 struct gaudi_device *gaudi = hdev->asic_specific; 8162 8163 spin_unlock(&gaudi->hw_queues_lock); 8164 } 8165 8166 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8167 { 8168 return hdev->pdev->device; 8169 } 8170 8171 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8172 size_t max_size) 8173 { 8174 struct gaudi_device *gaudi = hdev->asic_specific; 8175 8176 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8177 return 0; 8178 8179 return hl_fw_get_eeprom_data(hdev, data, max_size); 8180 } 8181 8182 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8183 { 8184 struct gaudi_device *gaudi = hdev->asic_specific; 8185 8186 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8187 return 0; 8188 8189 return hl_fw_get_monitor_dump(hdev, data); 8190 } 8191 8192 /* 8193 * this function should be used only during initialization and/or after reset, 8194 * when there are no active users. 8195 */ 8196 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8197 { 8198 u64 kernel_timeout; 8199 u32 status, offset; 8200 int rc; 8201 8202 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8203 8204 if (hdev->pldm) 8205 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8206 else 8207 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8208 8209 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8210 lower_32_bits(tpc_kernel)); 8211 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8212 upper_32_bits(tpc_kernel)); 8213 8214 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8215 lower_32_bits(tpc_kernel)); 8216 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8217 upper_32_bits(tpc_kernel)); 8218 /* set a valid LUT pointer, content is of no significance */ 8219 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8220 lower_32_bits(tpc_kernel)); 8221 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8222 upper_32_bits(tpc_kernel)); 8223 8224 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8225 lower_32_bits(CFG_BASE + 8226 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8227 8228 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8229 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8230 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8231 /* wait a bit for the engine to start executing */ 8232 usleep_range(1000, 1500); 8233 8234 /* wait until engine has finished executing */ 8235 rc = hl_poll_timeout( 8236 hdev, 8237 mmTPC0_CFG_STATUS + offset, 8238 status, 8239 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8240 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8241 1000, 8242 kernel_timeout); 8243 8244 if (rc) { 8245 dev_err(hdev->dev, 8246 "Timeout while waiting for TPC%d icache prefetch\n", 8247 tpc_id); 8248 return -EIO; 8249 } 8250 8251 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8252 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8253 8254 /* wait a bit for the engine to start executing */ 8255 usleep_range(1000, 1500); 8256 8257 /* wait until engine has finished executing */ 8258 rc = hl_poll_timeout( 8259 hdev, 8260 mmTPC0_CFG_STATUS + offset, 8261 status, 8262 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8263 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8264 1000, 8265 kernel_timeout); 8266 8267 if (rc) { 8268 dev_err(hdev->dev, 8269 "Timeout while waiting for TPC%d vector pipe\n", 8270 tpc_id); 8271 return -EIO; 8272 } 8273 8274 rc = hl_poll_timeout( 8275 hdev, 8276 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8277 status, 8278 (status == 0), 8279 1000, 8280 kernel_timeout); 8281 8282 if (rc) { 8283 dev_err(hdev->dev, 8284 "Timeout while waiting for TPC%d kernel to execute\n", 8285 tpc_id); 8286 return -EIO; 8287 } 8288 8289 return 0; 8290 } 8291 8292 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8293 struct hl_ctx *ctx) 8294 { 8295 struct gaudi_device *gaudi = hdev->asic_specific; 8296 int min_alloc_order, rc, collective_cb_size; 8297 8298 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8299 return 0; 8300 8301 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8302 HOST_SPACE_INTERNAL_CB_SZ, 8303 &hdev->internal_cb_pool_dma_addr, 8304 GFP_KERNEL | __GFP_ZERO); 8305 8306 if (!hdev->internal_cb_pool_virt_addr) 8307 return -ENOMEM; 8308 8309 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8310 sizeof(struct packet_fence); 8311 min_alloc_order = ilog2(collective_cb_size); 8312 8313 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8314 if (!hdev->internal_cb_pool) { 8315 dev_err(hdev->dev, 8316 "Failed to create internal CB pool\n"); 8317 rc = -ENOMEM; 8318 goto free_internal_cb_pool; 8319 } 8320 8321 rc = gen_pool_add(hdev->internal_cb_pool, 8322 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8323 HOST_SPACE_INTERNAL_CB_SZ, -1); 8324 if (rc) { 8325 dev_err(hdev->dev, 8326 "Failed to add memory to internal CB pool\n"); 8327 rc = -EFAULT; 8328 goto destroy_internal_cb_pool; 8329 } 8330 8331 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8332 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8333 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8334 8335 if (!hdev->internal_cb_va_base) { 8336 rc = -ENOMEM; 8337 goto destroy_internal_cb_pool; 8338 } 8339 8340 mutex_lock(&hdev->mmu_lock); 8341 8342 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8343 hdev->internal_cb_pool_dma_addr, 8344 HOST_SPACE_INTERNAL_CB_SZ); 8345 if (rc) 8346 goto unreserve_internal_cb_pool; 8347 8348 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8349 if (rc) 8350 goto unmap_internal_cb_pool; 8351 8352 mutex_unlock(&hdev->mmu_lock); 8353 8354 return 0; 8355 8356 unmap_internal_cb_pool: 8357 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8358 HOST_SPACE_INTERNAL_CB_SZ); 8359 unreserve_internal_cb_pool: 8360 mutex_unlock(&hdev->mmu_lock); 8361 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8362 HOST_SPACE_INTERNAL_CB_SZ); 8363 destroy_internal_cb_pool: 8364 gen_pool_destroy(hdev->internal_cb_pool); 8365 free_internal_cb_pool: 8366 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8367 hdev->internal_cb_pool_dma_addr); 8368 8369 return rc; 8370 } 8371 8372 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8373 struct hl_ctx *ctx) 8374 { 8375 struct gaudi_device *gaudi = hdev->asic_specific; 8376 8377 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8378 return; 8379 8380 mutex_lock(&hdev->mmu_lock); 8381 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8382 HOST_SPACE_INTERNAL_CB_SZ); 8383 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8384 HOST_SPACE_INTERNAL_CB_SZ); 8385 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8386 mutex_unlock(&hdev->mmu_lock); 8387 8388 gen_pool_destroy(hdev->internal_cb_pool); 8389 8390 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8391 hdev->internal_cb_pool_dma_addr); 8392 } 8393 8394 static int gaudi_ctx_init(struct hl_ctx *ctx) 8395 { 8396 int rc; 8397 8398 if (ctx->asid == HL_KERNEL_ASID_ID) 8399 return 0; 8400 8401 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8402 if (rc) 8403 return rc; 8404 8405 rc = gaudi_restore_user_registers(ctx->hdev); 8406 if (rc) 8407 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8408 8409 return rc; 8410 } 8411 8412 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8413 { 8414 if (ctx->asid == HL_KERNEL_ASID_ID) 8415 return; 8416 8417 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8418 } 8419 8420 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8421 { 8422 return 0; 8423 } 8424 8425 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8426 { 8427 return gaudi_cq_assignment[cq_idx]; 8428 } 8429 8430 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8431 { 8432 return sizeof(struct packet_msg_short) + 8433 sizeof(struct packet_msg_prot) * 2; 8434 } 8435 8436 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8437 { 8438 return sizeof(struct packet_msg_short) * 4 + 8439 sizeof(struct packet_fence) + 8440 sizeof(struct packet_msg_prot) * 2; 8441 } 8442 8443 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8444 { 8445 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8446 } 8447 8448 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8449 u32 size, bool eb) 8450 { 8451 struct hl_cb *cb = (struct hl_cb *) data; 8452 struct packet_msg_short *pkt; 8453 u32 value, ctl, pkt_size = sizeof(*pkt); 8454 8455 pkt = cb->kernel_address + size; 8456 memset(pkt, 0, pkt_size); 8457 8458 /* Inc by 1, Mode ADD */ 8459 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8460 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8461 8462 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8463 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8464 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8465 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8466 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8467 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8469 8470 pkt->value = cpu_to_le32(value); 8471 pkt->ctl = cpu_to_le32(ctl); 8472 8473 return size + pkt_size; 8474 } 8475 8476 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8477 u16 addr) 8478 { 8479 u32 ctl, pkt_size = sizeof(*pkt); 8480 8481 memset(pkt, 0, pkt_size); 8482 8483 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8484 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8485 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8486 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8487 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8489 8490 pkt->value = cpu_to_le32(value); 8491 pkt->ctl = cpu_to_le32(ctl); 8492 8493 return pkt_size; 8494 } 8495 8496 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8497 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8498 u16 sob_val, u16 mon_id) 8499 { 8500 u64 monitor_base; 8501 u32 ctl, value, pkt_size = sizeof(*pkt); 8502 u16 msg_addr_offset; 8503 u8 mask; 8504 8505 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8506 dev_err(hdev->dev, 8507 "sob_base %u (mask %#x) is not valid\n", 8508 sob_base, sob_mask); 8509 return 0; 8510 } 8511 8512 /* 8513 * monitor_base should be the content of the base0 address registers, 8514 * so it will be added to the msg short offsets 8515 */ 8516 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8517 8518 msg_addr_offset = 8519 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8520 monitor_base; 8521 8522 memset(pkt, 0, pkt_size); 8523 8524 /* Monitor config packet: bind the monitor to a sync object */ 8525 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8526 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8527 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8528 0); /* GREATER OR EQUAL*/ 8529 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8530 8531 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8532 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8533 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8534 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8535 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8536 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8538 8539 pkt->value = cpu_to_le32(value); 8540 pkt->ctl = cpu_to_le32(ctl); 8541 8542 return pkt_size; 8543 } 8544 8545 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8546 { 8547 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8548 8549 memset(pkt, 0, pkt_size); 8550 8551 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8552 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8553 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8554 8555 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8556 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8557 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8558 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8559 8560 pkt->cfg = cpu_to_le32(cfg); 8561 pkt->ctl = cpu_to_le32(ctl); 8562 8563 return pkt_size; 8564 } 8565 8566 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8567 { 8568 u32 offset, nic_index; 8569 8570 switch (queue_id) { 8571 case GAUDI_QUEUE_ID_DMA_0_0: 8572 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8573 break; 8574 case GAUDI_QUEUE_ID_DMA_0_1: 8575 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8576 break; 8577 case GAUDI_QUEUE_ID_DMA_0_2: 8578 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8579 break; 8580 case GAUDI_QUEUE_ID_DMA_0_3: 8581 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8582 break; 8583 case GAUDI_QUEUE_ID_DMA_1_0: 8584 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8585 break; 8586 case GAUDI_QUEUE_ID_DMA_1_1: 8587 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8588 break; 8589 case GAUDI_QUEUE_ID_DMA_1_2: 8590 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8591 break; 8592 case GAUDI_QUEUE_ID_DMA_1_3: 8593 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8594 break; 8595 case GAUDI_QUEUE_ID_DMA_5_0: 8596 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8597 break; 8598 case GAUDI_QUEUE_ID_DMA_5_1: 8599 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8600 break; 8601 case GAUDI_QUEUE_ID_DMA_5_2: 8602 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8603 break; 8604 case GAUDI_QUEUE_ID_DMA_5_3: 8605 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8606 break; 8607 case GAUDI_QUEUE_ID_TPC_7_0: 8608 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8609 break; 8610 case GAUDI_QUEUE_ID_TPC_7_1: 8611 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8612 break; 8613 case GAUDI_QUEUE_ID_TPC_7_2: 8614 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8615 break; 8616 case GAUDI_QUEUE_ID_TPC_7_3: 8617 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8618 break; 8619 case GAUDI_QUEUE_ID_NIC_0_0: 8620 case GAUDI_QUEUE_ID_NIC_1_0: 8621 case GAUDI_QUEUE_ID_NIC_2_0: 8622 case GAUDI_QUEUE_ID_NIC_3_0: 8623 case GAUDI_QUEUE_ID_NIC_4_0: 8624 case GAUDI_QUEUE_ID_NIC_5_0: 8625 case GAUDI_QUEUE_ID_NIC_6_0: 8626 case GAUDI_QUEUE_ID_NIC_7_0: 8627 case GAUDI_QUEUE_ID_NIC_8_0: 8628 case GAUDI_QUEUE_ID_NIC_9_0: 8629 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8630 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8631 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8632 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8633 break; 8634 case GAUDI_QUEUE_ID_NIC_0_1: 8635 case GAUDI_QUEUE_ID_NIC_1_1: 8636 case GAUDI_QUEUE_ID_NIC_2_1: 8637 case GAUDI_QUEUE_ID_NIC_3_1: 8638 case GAUDI_QUEUE_ID_NIC_4_1: 8639 case GAUDI_QUEUE_ID_NIC_5_1: 8640 case GAUDI_QUEUE_ID_NIC_6_1: 8641 case GAUDI_QUEUE_ID_NIC_7_1: 8642 case GAUDI_QUEUE_ID_NIC_8_1: 8643 case GAUDI_QUEUE_ID_NIC_9_1: 8644 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8645 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8646 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8647 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8648 break; 8649 case GAUDI_QUEUE_ID_NIC_0_2: 8650 case GAUDI_QUEUE_ID_NIC_1_2: 8651 case GAUDI_QUEUE_ID_NIC_2_2: 8652 case GAUDI_QUEUE_ID_NIC_3_2: 8653 case GAUDI_QUEUE_ID_NIC_4_2: 8654 case GAUDI_QUEUE_ID_NIC_5_2: 8655 case GAUDI_QUEUE_ID_NIC_6_2: 8656 case GAUDI_QUEUE_ID_NIC_7_2: 8657 case GAUDI_QUEUE_ID_NIC_8_2: 8658 case GAUDI_QUEUE_ID_NIC_9_2: 8659 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8660 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8661 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8662 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8663 break; 8664 case GAUDI_QUEUE_ID_NIC_0_3: 8665 case GAUDI_QUEUE_ID_NIC_1_3: 8666 case GAUDI_QUEUE_ID_NIC_2_3: 8667 case GAUDI_QUEUE_ID_NIC_3_3: 8668 case GAUDI_QUEUE_ID_NIC_4_3: 8669 case GAUDI_QUEUE_ID_NIC_5_3: 8670 case GAUDI_QUEUE_ID_NIC_6_3: 8671 case GAUDI_QUEUE_ID_NIC_7_3: 8672 case GAUDI_QUEUE_ID_NIC_8_3: 8673 case GAUDI_QUEUE_ID_NIC_9_3: 8674 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8675 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8676 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8677 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8678 break; 8679 default: 8680 return -EINVAL; 8681 } 8682 8683 *addr = CFG_BASE + offset; 8684 8685 return 0; 8686 } 8687 8688 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8689 { 8690 u64 monitor_base; 8691 u32 size = 0; 8692 u16 msg_addr_offset; 8693 8694 /* 8695 * monitor_base should be the content of the base0 address registers, 8696 * so it will be added to the msg short offsets 8697 */ 8698 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8699 8700 /* First monitor config packet: low address of the sync */ 8701 msg_addr_offset = 8702 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8703 monitor_base; 8704 8705 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8706 msg_addr_offset); 8707 8708 /* Second monitor config packet: high address of the sync */ 8709 msg_addr_offset = 8710 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8711 monitor_base; 8712 8713 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8714 msg_addr_offset); 8715 8716 /* 8717 * Third monitor config packet: the payload, i.e. what to write when the 8718 * sync triggers 8719 */ 8720 msg_addr_offset = 8721 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8722 monitor_base; 8723 8724 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8725 8726 return size; 8727 } 8728 8729 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8730 struct hl_gen_wait_properties *prop) 8731 { 8732 struct hl_cb *cb = (struct hl_cb *) prop->data; 8733 void *buf = cb->kernel_address; 8734 u64 fence_addr = 0; 8735 u32 size = prop->size; 8736 8737 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8738 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8739 prop->q_idx); 8740 return 0; 8741 } 8742 8743 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8744 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8745 prop->sob_mask, prop->sob_val, prop->mon_id); 8746 size += gaudi_add_fence_pkt(buf + size); 8747 8748 return size; 8749 } 8750 8751 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8752 { 8753 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8754 8755 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8756 hw_sob->sob_id); 8757 8758 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8759 hw_sob->sob_id * 4, 0); 8760 8761 kref_init(&hw_sob->kref); 8762 } 8763 8764 static u64 gaudi_get_device_time(struct hl_device *hdev) 8765 { 8766 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8767 8768 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8769 } 8770 8771 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8772 u32 *block_size, u32 *block_id) 8773 { 8774 return -EPERM; 8775 } 8776 8777 static int gaudi_block_mmap(struct hl_device *hdev, 8778 struct vm_area_struct *vma, 8779 u32 block_id, u32 block_size) 8780 { 8781 return -EPERM; 8782 } 8783 8784 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8785 { 8786 struct cpu_dyn_regs *dyn_regs = 8787 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8788 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8789 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8790 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8791 8792 WREG32(irq_handler_offset, 8793 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8794 } 8795 8796 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8797 { 8798 return -EINVAL; 8799 } 8800 8801 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8802 { 8803 switch (pll_idx) { 8804 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8805 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8806 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8807 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8808 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8809 case HL_GAUDI_MME_PLL: return MME_PLL; 8810 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8811 case HL_GAUDI_IF_PLL: return IF_PLL; 8812 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8813 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8814 default: return -EINVAL; 8815 } 8816 } 8817 8818 static int gaudi_add_sync_to_engine_map_entry( 8819 struct hl_sync_to_engine_map *map, u32 reg_value, 8820 enum hl_sync_engine_type engine_type, u32 engine_id) 8821 { 8822 struct hl_sync_to_engine_map_entry *entry; 8823 8824 /* Reg value represents a partial address of sync object, 8825 * it is used as unique identifier. For this we need to 8826 * clear the cutoff cfg base bits from the value. 8827 */ 8828 if (reg_value == 0 || reg_value == 0xffffffff) 8829 return 0; 8830 reg_value -= lower_32_bits(CFG_BASE); 8831 8832 /* create a new hash entry */ 8833 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8834 if (!entry) 8835 return -ENOMEM; 8836 entry->engine_type = engine_type; 8837 entry->engine_id = engine_id; 8838 entry->sync_id = reg_value; 8839 hash_add(map->tb, &entry->node, reg_value); 8840 8841 return 0; 8842 } 8843 8844 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8845 struct hl_sync_to_engine_map *map) 8846 { 8847 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8848 int i, j, rc; 8849 u32 reg_value; 8850 8851 /* Iterate over TPC engines */ 8852 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8853 8854 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8855 sds->props[SP_NEXT_TPC] * i); 8856 8857 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8858 ENGINE_TPC, i); 8859 if (rc) 8860 goto free_sync_to_engine_map; 8861 } 8862 8863 /* Iterate over MME engines */ 8864 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8865 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8866 8867 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8868 sds->props[SP_NEXT_MME] * i + 8869 j * sizeof(u32)); 8870 8871 rc = gaudi_add_sync_to_engine_map_entry( 8872 map, reg_value, ENGINE_MME, 8873 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8874 if (rc) 8875 goto free_sync_to_engine_map; 8876 } 8877 } 8878 8879 /* Iterate over DMA engines */ 8880 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8881 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8882 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8883 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8884 ENGINE_DMA, i); 8885 if (rc) 8886 goto free_sync_to_engine_map; 8887 } 8888 8889 return 0; 8890 8891 free_sync_to_engine_map: 8892 hl_state_dump_free_sync_to_engine_map(map); 8893 8894 return rc; 8895 } 8896 8897 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8898 { 8899 return FIELD_GET( 8900 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8901 mon->status); 8902 } 8903 8904 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8905 { 8906 const size_t max_write = 10; 8907 u32 gid, mask, sob; 8908 int i, offset; 8909 8910 /* Sync object ID is calculated as follows: 8911 * (8 * group_id + cleared bits in mask) 8912 */ 8913 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8914 mon->arm_data); 8915 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8916 mon->arm_data); 8917 8918 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8919 max_write; mask >>= 1, i++) { 8920 if (!(mask & 1)) { 8921 sob = gid * MONITOR_MAX_SOBS + i; 8922 8923 if (offset > 0) 8924 offset += snprintf(sobs + offset, max_write, 8925 ", "); 8926 8927 offset += snprintf(sobs + offset, max_write, "%u", sob); 8928 } 8929 } 8930 } 8931 8932 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8933 struct hl_device *hdev, 8934 struct hl_mon_state_dump *mon) 8935 { 8936 const char *name; 8937 char scratch_buf1[BIN_REG_STRING_SIZE], 8938 scratch_buf2[BIN_REG_STRING_SIZE]; 8939 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8940 8941 name = hl_state_dump_get_monitor_name(hdev, mon); 8942 if (!name) 8943 name = ""; 8944 8945 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 8946 8947 return hl_snprintf_resize( 8948 buf, size, offset, 8949 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 8950 mon->id, name, 8951 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8952 mon->arm_data), 8953 hl_format_as_binary( 8954 scratch_buf1, sizeof(scratch_buf1), 8955 FIELD_GET( 8956 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8957 mon->arm_data)), 8958 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 8959 mon->arm_data), 8960 mon->wr_data, 8961 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 8962 hl_format_as_binary( 8963 scratch_buf2, sizeof(scratch_buf2), 8964 FIELD_GET( 8965 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 8966 mon->status)), 8967 monitored_sobs); 8968 } 8969 8970 8971 static int gaudi_print_fences_single_engine( 8972 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 8973 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 8974 size_t *size, size_t *offset) 8975 { 8976 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8977 int rc = -ENOMEM, i; 8978 u32 *statuses, *fences; 8979 8980 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 8981 sizeof(*statuses), GFP_KERNEL); 8982 if (!statuses) 8983 goto out; 8984 8985 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 8986 sds->props[SP_ENGINE_NUM_OF_QUEUES], 8987 sizeof(*fences), GFP_KERNEL); 8988 if (!fences) 8989 goto free_status; 8990 8991 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 8992 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 8993 8994 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 8995 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 8996 fences[i] = RREG32(base_offset + i * sizeof(u32)); 8997 8998 /* The actual print */ 8999 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9000 u32 fence_id; 9001 u64 fence_cnt, fence_rdata; 9002 const char *engine_name; 9003 9004 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9005 statuses[i])) 9006 continue; 9007 9008 fence_id = 9009 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9010 fence_cnt = base_offset + CFG_BASE + 9011 sizeof(u32) * 9012 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9013 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9014 sds->props[SP_FENCE0_RDATA_OFFSET]; 9015 engine_name = hl_sync_engine_to_string(engine_type); 9016 9017 rc = hl_snprintf_resize( 9018 buf, size, offset, 9019 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9020 engine_name, engine_id, 9021 i, fence_id, 9022 fence_cnt, engine_name, engine_id, fence_id, i, 9023 fence_rdata, engine_name, engine_id, fence_id, i, 9024 fences[fence_id], 9025 statuses[i]); 9026 if (rc) 9027 goto free_fences; 9028 } 9029 9030 rc = 0; 9031 9032 free_fences: 9033 kfree(fences); 9034 free_status: 9035 kfree(statuses); 9036 out: 9037 return rc; 9038 } 9039 9040 9041 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9042 .monitor_valid = gaudi_monitor_valid, 9043 .print_single_monitor = gaudi_print_single_monitor, 9044 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9045 .print_fences_single_engine = gaudi_print_fences_single_engine, 9046 }; 9047 9048 static void gaudi_state_dump_init(struct hl_device *hdev) 9049 { 9050 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9051 int i; 9052 9053 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9054 hash_add(sds->so_id_to_str_tb, 9055 &gaudi_so_id_to_str[i].node, 9056 gaudi_so_id_to_str[i].id); 9057 9058 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9059 hash_add(sds->monitor_id_to_str_tb, 9060 &gaudi_monitor_id_to_str[i].node, 9061 gaudi_monitor_id_to_str[i].id); 9062 9063 sds->props = gaudi_state_dump_specs_props; 9064 9065 sds->sync_namager_names = gaudi_sync_manager_names; 9066 9067 sds->funcs = gaudi_state_dump_funcs; 9068 } 9069 9070 static u32 *gaudi_get_stream_master_qid_arr(void) 9071 { 9072 return gaudi_stream_master; 9073 } 9074 9075 static int gaudi_set_dram_properties(struct hl_device *hdev) 9076 { 9077 return 0; 9078 } 9079 9080 static int gaudi_set_binning_masks(struct hl_device *hdev) 9081 { 9082 return 0; 9083 } 9084 9085 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9086 { 9087 } 9088 9089 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9090 { 9091 struct hl_device *hdev = dev_get_drvdata(dev); 9092 struct cpucp_info *cpucp_info; 9093 9094 cpucp_info = &hdev->asic_prop.cpucp_info; 9095 9096 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9097 } 9098 9099 static DEVICE_ATTR_RO(infineon_ver); 9100 9101 static struct attribute *gaudi_vrm_dev_attrs[] = { 9102 &dev_attr_infineon_ver.attr, 9103 NULL, 9104 }; 9105 9106 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9107 struct attribute_group *dev_vrm_attr_grp) 9108 { 9109 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9110 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9111 } 9112 9113 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9114 { 9115 return 0; 9116 } 9117 9118 static const struct hl_asic_funcs gaudi_funcs = { 9119 .early_init = gaudi_early_init, 9120 .early_fini = gaudi_early_fini, 9121 .late_init = gaudi_late_init, 9122 .late_fini = gaudi_late_fini, 9123 .sw_init = gaudi_sw_init, 9124 .sw_fini = gaudi_sw_fini, 9125 .hw_init = gaudi_hw_init, 9126 .hw_fini = gaudi_hw_fini, 9127 .halt_engines = gaudi_halt_engines, 9128 .suspend = gaudi_suspend, 9129 .resume = gaudi_resume, 9130 .mmap = gaudi_mmap, 9131 .ring_doorbell = gaudi_ring_doorbell, 9132 .pqe_write = gaudi_pqe_write, 9133 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9134 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9135 .scrub_device_mem = gaudi_scrub_device_mem, 9136 .scrub_device_dram = gaudi_scrub_device_dram, 9137 .get_int_queue_base = gaudi_get_int_queue_base, 9138 .test_queues = gaudi_test_queues, 9139 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9140 .asic_dma_pool_free = gaudi_dma_pool_free, 9141 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9142 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9143 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9144 .cs_parser = gaudi_cs_parser, 9145 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9146 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9147 .update_eq_ci = gaudi_update_eq_ci, 9148 .context_switch = gaudi_context_switch, 9149 .restore_phase_topology = gaudi_restore_phase_topology, 9150 .debugfs_read_dma = gaudi_debugfs_read_dma, 9151 .add_device_attr = gaudi_add_device_attr, 9152 .handle_eqe = gaudi_handle_eqe, 9153 .get_events_stat = gaudi_get_events_stat, 9154 .read_pte = gaudi_read_pte, 9155 .write_pte = gaudi_write_pte, 9156 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9157 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9158 .mmu_prefetch_cache_range = NULL, 9159 .send_heartbeat = gaudi_send_heartbeat, 9160 .debug_coresight = gaudi_debug_coresight, 9161 .is_device_idle = gaudi_is_device_idle, 9162 .compute_reset_late_init = gaudi_compute_reset_late_init, 9163 .hw_queues_lock = gaudi_hw_queues_lock, 9164 .hw_queues_unlock = gaudi_hw_queues_unlock, 9165 .get_pci_id = gaudi_get_pci_id, 9166 .get_eeprom_data = gaudi_get_eeprom_data, 9167 .get_monitor_dump = gaudi_get_monitor_dump, 9168 .send_cpu_message = gaudi_send_cpu_message, 9169 .pci_bars_map = gaudi_pci_bars_map, 9170 .init_iatu = gaudi_init_iatu, 9171 .rreg = hl_rreg, 9172 .wreg = hl_wreg, 9173 .halt_coresight = gaudi_halt_coresight, 9174 .ctx_init = gaudi_ctx_init, 9175 .ctx_fini = gaudi_ctx_fini, 9176 .pre_schedule_cs = gaudi_pre_schedule_cs, 9177 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9178 .load_firmware_to_device = gaudi_load_firmware_to_device, 9179 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9180 .get_signal_cb_size = gaudi_get_signal_cb_size, 9181 .get_wait_cb_size = gaudi_get_wait_cb_size, 9182 .gen_signal_cb = gaudi_gen_signal_cb, 9183 .gen_wait_cb = gaudi_gen_wait_cb, 9184 .reset_sob = gaudi_reset_sob, 9185 .reset_sob_group = gaudi_reset_sob_group, 9186 .get_device_time = gaudi_get_device_time, 9187 .pb_print_security_errors = NULL, 9188 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9189 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9190 .get_dec_base_addr = NULL, 9191 .scramble_addr = hl_mmu_scramble_addr, 9192 .descramble_addr = hl_mmu_descramble_addr, 9193 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9194 .get_hw_block_id = gaudi_get_hw_block_id, 9195 .hw_block_mmap = gaudi_block_mmap, 9196 .enable_events_from_fw = gaudi_enable_events_from_fw, 9197 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9198 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9199 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9200 .init_firmware_loader = gaudi_init_firmware_loader, 9201 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9202 .state_dump_init = gaudi_state_dump_init, 9203 .get_sob_addr = gaudi_get_sob_addr, 9204 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9205 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9206 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9207 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9208 .access_dev_mem = hl_access_dev_mem, 9209 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9210 .send_device_activity = gaudi_send_device_activity, 9211 .set_dram_properties = gaudi_set_dram_properties, 9212 .set_binning_masks = gaudi_set_binning_masks, 9213 }; 9214 9215 /** 9216 * gaudi_set_asic_funcs - set GAUDI function pointers 9217 * 9218 * @hdev: pointer to hl_device structure 9219 * 9220 */ 9221 void gaudi_set_asic_funcs(struct hl_device *hdev) 9222 { 9223 hdev->asic_funcs = &gaudi_funcs; 9224 } 9225