1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85 #define GAUDI_MAX_STRING_LEN 20 86 87 #define GAUDI_CB_POOL_CB_CNT 512 88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 101 102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 103 104 #define MONITOR_SOB_STRING_SIZE 256 105 106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 107 GAUDI_QUEUE_ID_DMA_0_0, 108 GAUDI_QUEUE_ID_DMA_0_1, 109 GAUDI_QUEUE_ID_DMA_0_2, 110 GAUDI_QUEUE_ID_DMA_0_3, 111 GAUDI_QUEUE_ID_DMA_1_0, 112 GAUDI_QUEUE_ID_DMA_1_1, 113 GAUDI_QUEUE_ID_DMA_1_2, 114 GAUDI_QUEUE_ID_DMA_1_3 115 }; 116 117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 121 "gaudi cpu eq" 122 }; 123 124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 133 }; 134 135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 136 [0] = GAUDI_QUEUE_ID_DMA_0_0, 137 [1] = GAUDI_QUEUE_ID_DMA_0_1, 138 [2] = GAUDI_QUEUE_ID_DMA_0_2, 139 [3] = GAUDI_QUEUE_ID_DMA_0_3, 140 [4] = GAUDI_QUEUE_ID_DMA_1_0, 141 [5] = GAUDI_QUEUE_ID_DMA_1_1, 142 [6] = GAUDI_QUEUE_ID_DMA_1_2, 143 [7] = GAUDI_QUEUE_ID_DMA_1_3, 144 }; 145 146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 147 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 152 [PACKET_REPEAT] = sizeof(struct packet_repeat), 153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 154 [PACKET_FENCE] = sizeof(struct packet_fence), 155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 156 [PACKET_NOP] = sizeof(struct packet_nop), 157 [PACKET_STOP] = sizeof(struct packet_stop), 158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 159 [PACKET_WAIT] = sizeof(struct packet_wait), 160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 161 }; 162 163 static inline bool validate_packet_id(enum packet_id id) 164 { 165 switch (id) { 166 case PACKET_WREG_32: 167 case PACKET_WREG_BULK: 168 case PACKET_MSG_LONG: 169 case PACKET_MSG_SHORT: 170 case PACKET_CP_DMA: 171 case PACKET_REPEAT: 172 case PACKET_MSG_PROT: 173 case PACKET_FENCE: 174 case PACKET_LIN_DMA: 175 case PACKET_NOP: 176 case PACKET_STOP: 177 case PACKET_ARB_POINT: 178 case PACKET_WAIT: 179 case PACKET_LOAD_AND_EXE: 180 return true; 181 default: 182 return false; 183 } 184 } 185 186 static const char * const 187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 188 "tpc_address_exceed_slm", 189 "tpc_div_by_0", 190 "tpc_spu_mac_overflow", 191 "tpc_spu_addsub_overflow", 192 "tpc_spu_abs_overflow", 193 "tpc_spu_fp_dst_nan_inf", 194 "tpc_spu_fp_dst_denorm", 195 "tpc_vpu_mac_overflow", 196 "tpc_vpu_addsub_overflow", 197 "tpc_vpu_abs_overflow", 198 "tpc_vpu_fp_dst_nan_inf", 199 "tpc_vpu_fp_dst_denorm", 200 "tpc_assertions", 201 "tpc_illegal_instruction", 202 "tpc_pc_wrap_around", 203 "tpc_qm_sw_err", 204 "tpc_hbw_rresp_err", 205 "tpc_hbw_bresp_err", 206 "tpc_lbw_rresp_err", 207 "tpc_lbw_bresp_err" 208 }; 209 210 static const char * const 211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 212 "PQ AXI HBW error", 213 "CQ AXI HBW error", 214 "CP AXI HBW error", 215 "CP error due to undefined OPCODE", 216 "CP encountered STOP OPCODE", 217 "CP AXI LBW error", 218 "CP WRREG32 or WRBULK returned error", 219 "N/A", 220 "FENCE 0 inc over max value and clipped", 221 "FENCE 1 inc over max value and clipped", 222 "FENCE 2 inc over max value and clipped", 223 "FENCE 3 inc over max value and clipped", 224 "FENCE 0 dec under min value and clipped", 225 "FENCE 1 dec under min value and clipped", 226 "FENCE 2 dec under min value and clipped", 227 "FENCE 3 dec under min value and clipped" 228 }; 229 230 static const char * const 231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 232 "Choice push while full error", 233 "Choice Q watchdog error", 234 "MSG AXI LBW returned with error" 235 }; 236 237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 351 }; 352 353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 381 }; 382 383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 395 }; 396 397 static s64 gaudi_state_dump_specs_props[] = { 398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 401 [SP_MON_OBJ_WR_ADDR_LOW] = 402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 403 [SP_MON_OBJ_WR_ADDR_HIGH] = 404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 425 [SP_FENCE0_CNT_OFFSET] = 426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_FENCE0_RDATA_OFFSET] = 428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 430 [SP_NUM_CORES] = 1, 431 }; 432 433 static const int gaudi_queue_id_to_engine_id[] = { 434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 463 }; 464 465 /* The order here is opposite to the order of the indexing in the h/w. 466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 467 */ 468 static const char * const gaudi_sync_manager_names[] = { 469 "SYNC_MGR_E_N", 470 "SYNC_MGR_W_N", 471 "SYNC_MGR_E_S", 472 "SYNC_MGR_W_S", 473 NULL 474 }; 475 476 struct ecc_info_extract_params { 477 u64 block_address; 478 u32 num_memories; 479 bool derr; 480 }; 481 482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 483 u64 phys_addr); 484 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 485 struct hl_cs_job *job); 486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 487 u32 size, u64 val); 488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 489 u32 num_regs, u32 val); 490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 491 u32 tpc_id); 492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 493 static int gaudi_cpucp_info_get(struct hl_device *hdev); 494 static void gaudi_disable_clock_gating(struct hl_device *hdev); 495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 497 u32 size, bool eb); 498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 499 struct hl_gen_wait_properties *prop); 500 static inline enum hl_collective_mode 501 get_collective_mode(struct hl_device *hdev, u32 queue_id) 502 { 503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 504 return HL_COLLECTIVE_MASTER; 505 506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 508 return HL_COLLECTIVE_SLAVE; 509 510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 512 return HL_COLLECTIVE_SLAVE; 513 514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 516 return HL_COLLECTIVE_SLAVE; 517 518 return HL_COLLECTIVE_NOT_SUPPORTED; 519 } 520 521 static inline void set_default_power_values(struct hl_device *hdev) 522 { 523 struct asic_fixed_properties *prop = &hdev->asic_prop; 524 525 if (hdev->card_type == cpucp_card_type_pmc) { 526 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 527 528 if (prop->fw_security_enabled) 529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 530 else 531 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 532 } else { 533 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 534 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 535 } 536 } 537 538 static int gaudi_set_fixed_properties(struct hl_device *hdev) 539 { 540 struct asic_fixed_properties *prop = &hdev->asic_prop; 541 u32 num_sync_stream_queues = 0; 542 int i; 543 544 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 545 prop->hw_queues_props = kcalloc(prop->max_queues, 546 sizeof(struct hw_queue_properties), 547 GFP_KERNEL); 548 549 if (!prop->hw_queues_props) 550 return -ENOMEM; 551 552 for (i = 0 ; i < prop->max_queues ; i++) { 553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 555 prop->hw_queues_props[i].driver_only = 0; 556 prop->hw_queues_props[i].supports_sync_stream = 1; 557 prop->hw_queues_props[i].cb_alloc_flags = 558 CB_ALLOC_KERNEL; 559 num_sync_stream_queues++; 560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 562 prop->hw_queues_props[i].driver_only = 1; 563 prop->hw_queues_props[i].supports_sync_stream = 0; 564 prop->hw_queues_props[i].cb_alloc_flags = 565 CB_ALLOC_KERNEL; 566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 568 prop->hw_queues_props[i].driver_only = 0; 569 prop->hw_queues_props[i].supports_sync_stream = 0; 570 prop->hw_queues_props[i].cb_alloc_flags = 571 CB_ALLOC_USER; 572 573 } 574 prop->hw_queues_props[i].collective_mode = 575 get_collective_mode(hdev, i); 576 } 577 578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 579 prop->cfg_base_address = CFG_BASE; 580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 581 prop->host_base_address = HOST_PHYS_BASE; 582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 584 prop->completion_mode = HL_COMPLETION_MODE_JOB; 585 prop->collective_first_sob = 0; 586 prop->collective_first_mon = 0; 587 588 /* 2 SOBs per internal queue stream are reserved for collective */ 589 prop->sync_stream_first_sob = 590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 591 * QMAN_STREAMS * HL_RSVD_SOBS; 592 593 /* 1 monitor per internal queue stream are reserved for collective 594 * 2 monitors per external queue stream are reserved for collective 595 */ 596 prop->sync_stream_first_mon = 597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 598 (NUMBER_OF_EXT_HW_QUEUES * 2); 599 600 prop->dram_base_address = DRAM_PHYS_BASE; 601 prop->dram_size = GAUDI_HBM_SIZE_32GB; 602 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 604 605 prop->sram_base_address = SRAM_BASE_ADDR; 606 prop->sram_size = SRAM_SIZE; 607 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 608 prop->sram_user_base_address = 609 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 610 611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 613 614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 615 if (hdev->pldm) 616 prop->mmu_pgt_size = 0x800000; /* 8MB */ 617 else 618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 619 prop->mmu_pte_size = HL_PTE_SIZE; 620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 622 prop->dram_page_size = PAGE_SIZE_2MB; 623 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 624 prop->dram_supports_virtual_memory = false; 625 626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 636 prop->pmmu.start_addr = VA_HOST_SPACE_START; 637 prop->pmmu.end_addr = 638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 639 prop->pmmu.page_size = PAGE_SIZE_4KB; 640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 641 prop->pmmu.last_mask = LAST_MASK; 642 /* TODO: will be duplicated until implementing per-MMU props */ 643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 645 646 /* PMMU and HPMMU are the same except of page size */ 647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 649 650 /* shifts and masks are the same in PMMU and DMMU */ 651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 653 prop->dmmu.end_addr = VA_HOST_SPACE_END; 654 prop->dmmu.page_size = PAGE_SIZE_2MB; 655 656 prop->cfg_size = CFG_SIZE; 657 prop->max_asid = MAX_ASID; 658 prop->num_of_events = GAUDI_EVENT_SIZE; 659 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 660 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 661 662 set_default_power_values(hdev); 663 664 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 665 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 666 667 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 668 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 669 670 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 671 CARD_NAME_MAX_LEN); 672 673 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 674 675 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 676 prop->sync_stream_first_sob + 677 (num_sync_stream_queues * HL_RSVD_SOBS); 678 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 679 prop->sync_stream_first_mon + 680 (num_sync_stream_queues * HL_RSVD_MONS); 681 682 prop->first_available_user_interrupt = USHRT_MAX; 683 prop->tpc_interrupt_id = USHRT_MAX; 684 685 for (i = 0 ; i < HL_MAX_DCORES ; i++) 686 prop->first_available_cq[i] = USHRT_MAX; 687 688 prop->fw_cpu_boot_dev_sts0_valid = false; 689 prop->fw_cpu_boot_dev_sts1_valid = false; 690 prop->hard_reset_done_by_fw = false; 691 prop->gic_interrupts_enable = true; 692 693 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 694 695 prop->clk_pll_index = HL_GAUDI_MME_PLL; 696 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 697 698 prop->use_get_power_for_reset_history = true; 699 700 prop->configurable_stop_on_err = true; 701 702 prop->set_max_power_on_device_init = true; 703 704 prop->dma_mask = 48; 705 706 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 707 708 return 0; 709 } 710 711 static int gaudi_pci_bars_map(struct hl_device *hdev) 712 { 713 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 714 bool is_wc[3] = {false, false, true}; 715 int rc; 716 717 rc = hl_pci_bars_map(hdev, name, is_wc); 718 if (rc) 719 return rc; 720 721 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 722 (CFG_BASE - SPI_FLASH_BASE_ADDR); 723 724 return 0; 725 } 726 727 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 728 { 729 struct gaudi_device *gaudi = hdev->asic_specific; 730 struct hl_inbound_pci_region pci_region; 731 u64 old_addr = addr; 732 int rc; 733 734 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 735 return old_addr; 736 737 if (hdev->asic_prop.iatu_done_by_fw) 738 return U64_MAX; 739 740 /* Inbound Region 2 - Bar 4 - Point to HBM */ 741 pci_region.mode = PCI_BAR_MATCH_MODE; 742 pci_region.bar = HBM_BAR_ID; 743 pci_region.addr = addr; 744 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 745 if (rc) 746 return U64_MAX; 747 748 if (gaudi) { 749 old_addr = gaudi->hbm_bar_cur_addr; 750 gaudi->hbm_bar_cur_addr = addr; 751 } 752 753 return old_addr; 754 } 755 756 static int gaudi_init_iatu(struct hl_device *hdev) 757 { 758 struct hl_inbound_pci_region inbound_region; 759 struct hl_outbound_pci_region outbound_region; 760 int rc; 761 762 if (hdev->asic_prop.iatu_done_by_fw) 763 return 0; 764 765 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 766 inbound_region.mode = PCI_BAR_MATCH_MODE; 767 inbound_region.bar = SRAM_BAR_ID; 768 inbound_region.addr = SRAM_BASE_ADDR; 769 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 770 if (rc) 771 goto done; 772 773 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 774 inbound_region.mode = PCI_BAR_MATCH_MODE; 775 inbound_region.bar = CFG_BAR_ID; 776 inbound_region.addr = SPI_FLASH_BASE_ADDR; 777 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 778 if (rc) 779 goto done; 780 781 /* Inbound Region 2 - Bar 4 - Point to HBM */ 782 inbound_region.mode = PCI_BAR_MATCH_MODE; 783 inbound_region.bar = HBM_BAR_ID; 784 inbound_region.addr = DRAM_PHYS_BASE; 785 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 786 if (rc) 787 goto done; 788 789 /* Outbound Region 0 - Point to Host */ 790 outbound_region.addr = HOST_PHYS_BASE; 791 outbound_region.size = HOST_PHYS_SIZE; 792 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 793 794 done: 795 return rc; 796 } 797 798 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 799 { 800 return RREG32(mmHW_STATE); 801 } 802 803 static int gaudi_early_init(struct hl_device *hdev) 804 { 805 struct asic_fixed_properties *prop = &hdev->asic_prop; 806 struct pci_dev *pdev = hdev->pdev; 807 resource_size_t pci_bar_size; 808 u32 fw_boot_status; 809 int rc; 810 811 rc = gaudi_set_fixed_properties(hdev); 812 if (rc) { 813 dev_err(hdev->dev, "Failed setting fixed properties\n"); 814 return rc; 815 } 816 817 /* Check BAR sizes */ 818 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 819 820 if (pci_bar_size != SRAM_BAR_SIZE) { 821 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 822 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 823 rc = -ENODEV; 824 goto free_queue_props; 825 } 826 827 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 828 829 if (pci_bar_size != CFG_BAR_SIZE) { 830 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 831 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 832 rc = -ENODEV; 833 goto free_queue_props; 834 } 835 836 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 837 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 838 839 /* If FW security is enabled at this point it means no access to ELBI */ 840 if (hdev->asic_prop.fw_security_enabled) { 841 hdev->asic_prop.iatu_done_by_fw = true; 842 843 /* 844 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 845 * decision can only be taken based on PCI ID security. 846 */ 847 hdev->asic_prop.gic_interrupts_enable = false; 848 goto pci_init; 849 } 850 851 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 852 &fw_boot_status); 853 if (rc) 854 goto free_queue_props; 855 856 /* Check whether FW is configuring iATU */ 857 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 858 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 859 hdev->asic_prop.iatu_done_by_fw = true; 860 861 pci_init: 862 rc = hl_pci_init(hdev); 863 if (rc) 864 goto free_queue_props; 865 866 /* Before continuing in the initialization, we need to read the preboot 867 * version to determine whether we run with a security-enabled firmware 868 */ 869 rc = hl_fw_read_preboot_status(hdev); 870 if (rc) { 871 if (hdev->reset_on_preboot_fail) 872 /* we are already on failure flow, so don't check if hw_fini fails. */ 873 hdev->asic_funcs->hw_fini(hdev, true, false); 874 goto pci_fini; 875 } 876 877 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 878 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 879 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 880 if (rc) { 881 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 882 goto pci_fini; 883 } 884 } 885 886 return 0; 887 888 pci_fini: 889 hl_pci_fini(hdev); 890 free_queue_props: 891 kfree(hdev->asic_prop.hw_queues_props); 892 return rc; 893 } 894 895 static int gaudi_early_fini(struct hl_device *hdev) 896 { 897 kfree(hdev->asic_prop.hw_queues_props); 898 hl_pci_fini(hdev); 899 900 return 0; 901 } 902 903 /** 904 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 905 * 906 * @hdev: pointer to hl_device structure 907 * 908 */ 909 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 910 { 911 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 912 struct asic_fixed_properties *prop = &hdev->asic_prop; 913 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 914 int rc; 915 916 if ((hdev->fw_components & FW_TYPE_LINUX) && 917 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 918 struct gaudi_device *gaudi = hdev->asic_specific; 919 920 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 921 return 0; 922 923 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 924 925 if (rc) 926 return rc; 927 928 freq = pll_freq_arr[2]; 929 } else { 930 /* Backward compatibility */ 931 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 932 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 933 nr = RREG32(mmPSOC_CPU_PLL_NR); 934 nf = RREG32(mmPSOC_CPU_PLL_NF); 935 od = RREG32(mmPSOC_CPU_PLL_OD); 936 937 if (div_sel == DIV_SEL_REF_CLK || 938 div_sel == DIV_SEL_DIVIDED_REF) { 939 if (div_sel == DIV_SEL_REF_CLK) 940 freq = PLL_REF_CLK; 941 else 942 freq = PLL_REF_CLK / (div_fctr + 1); 943 } else if (div_sel == DIV_SEL_PLL_CLK || 944 div_sel == DIV_SEL_DIVIDED_PLL) { 945 pll_clk = PLL_REF_CLK * (nf + 1) / 946 ((nr + 1) * (od + 1)); 947 if (div_sel == DIV_SEL_PLL_CLK) 948 freq = pll_clk; 949 else 950 freq = pll_clk / (div_fctr + 1); 951 } else { 952 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 953 freq = 0; 954 } 955 } 956 957 prop->psoc_timestamp_frequency = freq; 958 prop->psoc_pci_pll_nr = nr; 959 prop->psoc_pci_pll_nf = nf; 960 prop->psoc_pci_pll_od = od; 961 prop->psoc_pci_pll_div_factor = div_fctr; 962 963 return 0; 964 } 965 966 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 967 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 968 { 969 struct asic_fixed_properties *prop = &hdev->asic_prop; 970 struct packet_lin_dma *init_tpc_mem_pkt; 971 struct hl_cs_job *job; 972 struct hl_cb *cb; 973 u64 dst_addr; 974 u32 cb_size, ctl; 975 u8 tpc_id; 976 int rc; 977 978 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 979 if (!cb) 980 return -EFAULT; 981 982 init_tpc_mem_pkt = cb->kernel_address; 983 cb_size = sizeof(*init_tpc_mem_pkt); 984 memset(init_tpc_mem_pkt, 0, cb_size); 985 986 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 987 988 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 989 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 991 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 992 993 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 994 995 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 996 997 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 998 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 999 round_up(prop->sram_user_base_address, SZ_8K)); 1000 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1001 1002 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1003 if (!job) { 1004 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1005 rc = -ENOMEM; 1006 goto release_cb; 1007 } 1008 1009 job->id = 0; 1010 job->user_cb = cb; 1011 atomic_inc(&job->user_cb->cs_cnt); 1012 job->user_cb_size = cb_size; 1013 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1014 job->patched_cb = job->user_cb; 1015 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1016 1017 hl_debugfs_add_job(hdev, job); 1018 1019 rc = gaudi_send_job_on_qman0(hdev, job); 1020 1021 if (rc) 1022 goto free_job; 1023 1024 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1025 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1026 if (rc) 1027 break; 1028 } 1029 1030 free_job: 1031 hl_userptr_delete_list(hdev, &job->userptr_list); 1032 hl_debugfs_remove_job(hdev, job); 1033 kfree(job); 1034 atomic_dec(&cb->cs_cnt); 1035 1036 release_cb: 1037 hl_cb_put(cb); 1038 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1039 1040 return rc; 1041 } 1042 1043 /* 1044 * gaudi_init_tpc_mem() - Initialize TPC memories. 1045 * @hdev: Pointer to hl_device structure. 1046 * 1047 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1048 * 1049 * Return: 0 for success, negative value for error. 1050 */ 1051 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1052 { 1053 const struct firmware *fw; 1054 size_t fw_size; 1055 void *cpu_addr; 1056 dma_addr_t dma_handle; 1057 int rc, count = 5; 1058 1059 again: 1060 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1061 if (rc == -EINTR && count-- > 0) { 1062 msleep(50); 1063 goto again; 1064 } 1065 1066 if (rc) { 1067 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1068 GAUDI_TPC_FW_FILE); 1069 goto out; 1070 } 1071 1072 fw_size = fw->size; 1073 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1074 if (!cpu_addr) { 1075 dev_err(hdev->dev, 1076 "Failed to allocate %zu of dma memory for TPC kernel\n", 1077 fw_size); 1078 rc = -ENOMEM; 1079 goto out; 1080 } 1081 1082 memcpy(cpu_addr, fw->data, fw_size); 1083 1084 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1085 1086 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1087 1088 out: 1089 release_firmware(fw); 1090 return rc; 1091 } 1092 1093 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1094 { 1095 struct gaudi_device *gaudi = hdev->asic_specific; 1096 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1097 struct hl_hw_queue *q; 1098 u32 i, sob_id, sob_group_id, queue_id; 1099 1100 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1101 sob_group_id = 1102 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1103 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1104 1105 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1107 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1108 q->sync_stream_prop.collective_sob_id = sob_id + i; 1109 } 1110 1111 /* Both DMA5 and TPC7 use the same resources since only a single 1112 * engine need to participate in the reduction process 1113 */ 1114 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1115 q = &hdev->kernel_queues[queue_id]; 1116 q->sync_stream_prop.collective_sob_id = 1117 sob_id + NIC_NUMBER_OF_ENGINES; 1118 1119 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1120 q = &hdev->kernel_queues[queue_id]; 1121 q->sync_stream_prop.collective_sob_id = 1122 sob_id + NIC_NUMBER_OF_ENGINES; 1123 } 1124 1125 static void gaudi_sob_group_hw_reset(struct kref *ref) 1126 { 1127 struct gaudi_hw_sob_group *hw_sob_group = 1128 container_of(ref, struct gaudi_hw_sob_group, kref); 1129 struct hl_device *hdev = hw_sob_group->hdev; 1130 int i; 1131 1132 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1133 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1134 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1135 1136 kref_init(&hw_sob_group->kref); 1137 } 1138 1139 static void gaudi_sob_group_reset_error(struct kref *ref) 1140 { 1141 struct gaudi_hw_sob_group *hw_sob_group = 1142 container_of(ref, struct gaudi_hw_sob_group, kref); 1143 struct hl_device *hdev = hw_sob_group->hdev; 1144 1145 dev_crit(hdev->dev, 1146 "SOB release shouldn't be called here, base_sob_id: %d\n", 1147 hw_sob_group->base_sob_id); 1148 } 1149 1150 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1151 { 1152 struct gaudi_collective_properties *prop; 1153 int i; 1154 1155 prop = &gaudi->collective_props; 1156 1157 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1158 1159 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1160 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1161 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1162 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1163 /* Set collective engine bit */ 1164 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1165 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1166 } 1167 1168 static int gaudi_collective_init(struct hl_device *hdev) 1169 { 1170 u32 i, sob_id, reserved_sobs_per_group; 1171 struct gaudi_collective_properties *prop; 1172 struct gaudi_device *gaudi; 1173 1174 gaudi = hdev->asic_specific; 1175 prop = &gaudi->collective_props; 1176 sob_id = hdev->asic_prop.collective_first_sob; 1177 1178 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1179 reserved_sobs_per_group = 1180 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1181 1182 /* Init SOB groups */ 1183 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1184 prop->hw_sob_group[i].hdev = hdev; 1185 prop->hw_sob_group[i].base_sob_id = sob_id; 1186 sob_id += reserved_sobs_per_group; 1187 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1188 } 1189 1190 for (i = 0 ; i < QMAN_STREAMS; i++) { 1191 prop->next_sob_group_val[i] = 1; 1192 prop->curr_sob_group_idx[i] = 0; 1193 gaudi_collective_map_sobs(hdev, i); 1194 } 1195 1196 gaudi_collective_mstr_sob_mask_set(gaudi); 1197 1198 return 0; 1199 } 1200 1201 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1202 { 1203 struct gaudi_device *gaudi = hdev->asic_specific; 1204 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1205 1206 kref_put(&cprop->hw_sob_group[sob_group].kref, 1207 gaudi_sob_group_hw_reset); 1208 } 1209 1210 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1211 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1212 { 1213 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1214 struct gaudi_collective_properties *cprop; 1215 struct hl_gen_wait_properties wait_prop; 1216 struct hl_sync_stream_properties *prop; 1217 struct gaudi_device *gaudi; 1218 1219 gaudi = hdev->asic_specific; 1220 cprop = &gaudi->collective_props; 1221 queue_id = job->hw_queue_id; 1222 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1223 1224 master_sob_base = 1225 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1226 master_monitor = prop->collective_mstr_mon_id[0]; 1227 1228 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1229 1230 dev_dbg(hdev->dev, 1231 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1232 master_sob_base, cprop->mstr_sob_mask[0], 1233 cprop->next_sob_group_val[stream], 1234 master_monitor, queue_id); 1235 1236 wait_prop.data = (void *) job->patched_cb; 1237 wait_prop.sob_base = master_sob_base; 1238 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1239 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1240 wait_prop.mon_id = master_monitor; 1241 wait_prop.q_idx = queue_id; 1242 wait_prop.size = cb_size; 1243 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1244 1245 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1246 master_monitor = prop->collective_mstr_mon_id[1]; 1247 1248 dev_dbg(hdev->dev, 1249 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1250 master_sob_base, cprop->mstr_sob_mask[1], 1251 cprop->next_sob_group_val[stream], 1252 master_monitor, queue_id); 1253 1254 wait_prop.sob_base = master_sob_base; 1255 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1256 wait_prop.mon_id = master_monitor; 1257 wait_prop.size = cb_size; 1258 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1259 } 1260 1261 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1262 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1263 { 1264 struct hl_gen_wait_properties wait_prop; 1265 struct hl_sync_stream_properties *prop; 1266 u32 queue_id, cb_size = 0; 1267 1268 queue_id = job->hw_queue_id; 1269 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1270 1271 if (job->cs->encaps_signals) { 1272 /* use the encaps signal handle store earlier in the flow 1273 * and set the SOB information from the encaps 1274 * signals handle 1275 */ 1276 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1277 cs_cmpl); 1278 1279 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1280 job->cs->sequence, 1281 cs_cmpl->hw_sob->sob_id, 1282 cs_cmpl->sob_val); 1283 } 1284 1285 /* Add to wait CBs using slave monitor */ 1286 wait_prop.data = (void *) job->user_cb; 1287 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1288 wait_prop.sob_mask = 0x1; 1289 wait_prop.sob_val = cs_cmpl->sob_val; 1290 wait_prop.mon_id = prop->collective_slave_mon_id; 1291 wait_prop.q_idx = queue_id; 1292 wait_prop.size = cb_size; 1293 1294 dev_dbg(hdev->dev, 1295 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1296 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1297 prop->collective_slave_mon_id, queue_id); 1298 1299 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1300 1301 dev_dbg(hdev->dev, 1302 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1303 prop->collective_sob_id, queue_id); 1304 1305 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1306 prop->collective_sob_id, cb_size, false); 1307 } 1308 1309 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1310 { 1311 struct hl_cs_compl *signal_cs_cmpl = 1312 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1313 struct hl_cs_compl *cs_cmpl = 1314 container_of(cs->fence, struct hl_cs_compl, base_fence); 1315 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1316 struct gaudi_collective_properties *cprop; 1317 u32 stream, queue_id, sob_group_offset; 1318 struct gaudi_device *gaudi; 1319 struct hl_device *hdev; 1320 struct hl_cs_job *job; 1321 struct hl_ctx *ctx; 1322 1323 ctx = cs->ctx; 1324 hdev = ctx->hdev; 1325 gaudi = hdev->asic_specific; 1326 cprop = &gaudi->collective_props; 1327 1328 if (cs->encaps_signals) { 1329 cs_cmpl->hw_sob = handle->hw_sob; 1330 /* at this checkpoint we only need the hw_sob pointer 1331 * for the completion check before start going over the jobs 1332 * of the master/slaves, the sob_value will be taken later on 1333 * in gaudi_collective_slave_init_job depends on each 1334 * job wait offset value. 1335 */ 1336 cs_cmpl->sob_val = 0; 1337 } else { 1338 /* copy the SOB id and value of the signal CS */ 1339 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1340 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1341 } 1342 1343 /* check again if the signal cs already completed. 1344 * if yes then don't send any wait cs since the hw_sob 1345 * could be in reset already. if signal is not completed 1346 * then get refcount to hw_sob to prevent resetting the sob 1347 * while wait cs is not submitted. 1348 * note that this check is protected by two locks, 1349 * hw queue lock and completion object lock, 1350 * and the same completion object lock also protects 1351 * the hw_sob reset handler function. 1352 * The hw_queue lock prevent out of sync of hw_sob 1353 * refcount value, changed by signal/wait flows. 1354 */ 1355 spin_lock(&signal_cs_cmpl->lock); 1356 1357 if (completion_done(&cs->signal_fence->completion)) { 1358 spin_unlock(&signal_cs_cmpl->lock); 1359 return -EINVAL; 1360 } 1361 /* Increment kref since all slave queues are now waiting on it */ 1362 kref_get(&cs_cmpl->hw_sob->kref); 1363 1364 spin_unlock(&signal_cs_cmpl->lock); 1365 1366 /* Calculate the stream from collective master queue (1st job) */ 1367 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1368 stream = job->hw_queue_id % 4; 1369 sob_group_offset = 1370 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1371 1372 list_for_each_entry(job, &cs->job_list, cs_node) { 1373 queue_id = job->hw_queue_id; 1374 1375 if (hdev->kernel_queues[queue_id].collective_mode == 1376 HL_COLLECTIVE_MASTER) 1377 gaudi_collective_master_init_job(hdev, job, stream, 1378 sob_group_offset); 1379 else 1380 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1381 } 1382 1383 cs_cmpl->sob_group = sob_group_offset; 1384 1385 /* Handle sob group kref and wraparound */ 1386 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1387 cprop->next_sob_group_val[stream]++; 1388 1389 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1390 /* 1391 * Decrement as we reached the max value. 1392 * The release function won't be called here as we've 1393 * just incremented the refcount. 1394 */ 1395 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1396 gaudi_sob_group_reset_error); 1397 cprop->next_sob_group_val[stream] = 1; 1398 /* only two SOBs are currently in use */ 1399 cprop->curr_sob_group_idx[stream] = 1400 (cprop->curr_sob_group_idx[stream] + 1) & 1401 (HL_RSVD_SOBS - 1); 1402 1403 gaudi_collective_map_sobs(hdev, stream); 1404 1405 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1406 cprop->curr_sob_group_idx[stream], stream); 1407 } 1408 1409 mb(); 1410 hl_fence_put(cs->signal_fence); 1411 cs->signal_fence = NULL; 1412 1413 return 0; 1414 } 1415 1416 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1417 { 1418 u32 cacheline_end, additional_commands; 1419 1420 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1421 additional_commands = sizeof(struct packet_msg_prot) * 2; 1422 1423 if (user_cb_size + additional_commands > cacheline_end) 1424 return cacheline_end - user_cb_size + additional_commands; 1425 else 1426 return additional_commands; 1427 } 1428 1429 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1430 struct hl_ctx *ctx, struct hl_cs *cs, 1431 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1432 u32 encaps_signal_offset) 1433 { 1434 struct hw_queue_properties *hw_queue_prop; 1435 struct hl_cs_counters_atomic *cntr; 1436 struct hl_cs_job *job; 1437 struct hl_cb *cb; 1438 u32 cb_size; 1439 bool patched_cb; 1440 1441 cntr = &hdev->aggregated_cs_counters; 1442 1443 if (mode == HL_COLLECTIVE_MASTER) { 1444 /* CB size of collective master queue contains 1445 * 4 msg short packets for monitor 1 configuration 1446 * 1 fence packet 1447 * 4 msg short packets for monitor 2 configuration 1448 * 1 fence packet 1449 * 2 msg prot packets for completion and MSI 1450 */ 1451 cb_size = sizeof(struct packet_msg_short) * 8 + 1452 sizeof(struct packet_fence) * 2 + 1453 sizeof(struct packet_msg_prot) * 2; 1454 patched_cb = true; 1455 } else { 1456 /* CB size of collective slave queues contains 1457 * 4 msg short packets for monitor configuration 1458 * 1 fence packet 1459 * 1 additional msg short packet for sob signal 1460 */ 1461 cb_size = sizeof(struct packet_msg_short) * 5 + 1462 sizeof(struct packet_fence); 1463 patched_cb = false; 1464 } 1465 1466 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1467 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1468 if (!job) { 1469 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1470 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1471 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1472 return -ENOMEM; 1473 } 1474 1475 /* Allocate internal mapped CB for non patched CBs */ 1476 cb = hl_cb_kernel_create(hdev, cb_size, 1477 hdev->mmu_enable && !patched_cb); 1478 if (!cb) { 1479 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1480 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1481 kfree(job); 1482 return -EFAULT; 1483 } 1484 1485 job->id = 0; 1486 job->cs = cs; 1487 job->user_cb = cb; 1488 atomic_inc(&job->user_cb->cs_cnt); 1489 job->user_cb_size = cb_size; 1490 job->hw_queue_id = queue_id; 1491 1492 /* since its guaranteed to have only one chunk in the collective wait 1493 * cs, we can use this chunk to set the encapsulated signal offset 1494 * in the jobs. 1495 */ 1496 if (cs->encaps_signals) 1497 job->encaps_sig_wait_offset = encaps_signal_offset; 1498 1499 /* 1500 * No need in parsing, user CB is the patched CB. 1501 * We call hl_cb_destroy() out of two reasons - we don't need 1502 * the CB in the CB idr anymore and to decrement its refcount as 1503 * it was incremented inside hl_cb_kernel_create(). 1504 */ 1505 if (patched_cb) 1506 job->patched_cb = job->user_cb; 1507 else 1508 job->patched_cb = NULL; 1509 1510 job->job_cb_size = job->user_cb_size; 1511 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1512 1513 /* increment refcount as for external queues we get completion */ 1514 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1515 cs_get(cs); 1516 1517 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1518 1519 list_add_tail(&job->cs_node, &cs->job_list); 1520 1521 hl_debugfs_add_job(hdev, job); 1522 1523 return 0; 1524 } 1525 1526 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1527 struct hl_ctx *ctx, struct hl_cs *cs, 1528 u32 wait_queue_id, u32 collective_engine_id, 1529 u32 encaps_signal_offset) 1530 { 1531 struct gaudi_device *gaudi = hdev->asic_specific; 1532 struct hw_queue_properties *hw_queue_prop; 1533 u32 queue_id, collective_queue, num_jobs; 1534 u32 stream, nic_queue, nic_idx = 0; 1535 bool skip; 1536 int i, rc = 0; 1537 1538 /* Verify wait queue id is configured as master */ 1539 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1540 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1541 dev_err(hdev->dev, 1542 "Queue %d is not configured as collective master\n", 1543 wait_queue_id); 1544 return -EINVAL; 1545 } 1546 1547 /* Verify engine id is supported */ 1548 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1549 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1550 dev_err(hdev->dev, 1551 "Collective wait does not support engine %u\n", 1552 collective_engine_id); 1553 return -EINVAL; 1554 } 1555 1556 stream = wait_queue_id % 4; 1557 1558 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1559 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1560 else 1561 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1562 1563 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1564 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1565 1566 /* First job goes to the collective master queue, it will wait for 1567 * the collective slave queues to finish execution. 1568 * The synchronization is done using two monitors: 1569 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1570 * reduction engine (DMA5/TPC7). 1571 * 1572 * Rest of the jobs goes to the collective slave queues which will 1573 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1574 */ 1575 for (i = 0 ; i < num_jobs ; i++) { 1576 if (i == 0) { 1577 queue_id = wait_queue_id; 1578 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1579 HL_COLLECTIVE_MASTER, queue_id, 1580 wait_queue_id, encaps_signal_offset); 1581 } else { 1582 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1583 if (gaudi->hw_cap_initialized & 1584 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1585 skip = false; 1586 else 1587 skip = true; 1588 1589 queue_id = nic_queue; 1590 nic_queue += 4; 1591 nic_idx++; 1592 1593 if (skip) 1594 continue; 1595 } else { 1596 queue_id = collective_queue; 1597 } 1598 1599 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1600 HL_COLLECTIVE_SLAVE, queue_id, 1601 wait_queue_id, encaps_signal_offset); 1602 } 1603 1604 if (rc) 1605 return rc; 1606 } 1607 1608 return rc; 1609 } 1610 1611 static int gaudi_late_init(struct hl_device *hdev) 1612 { 1613 struct gaudi_device *gaudi = hdev->asic_specific; 1614 int rc; 1615 1616 rc = gaudi->cpucp_info_get(hdev); 1617 if (rc) { 1618 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1619 return rc; 1620 } 1621 1622 if ((hdev->card_type == cpucp_card_type_pci) && 1623 (hdev->nic_ports_mask & 0x3)) { 1624 dev_info(hdev->dev, 1625 "PCI card detected, only 8 ports are enabled\n"); 1626 hdev->nic_ports_mask &= ~0x3; 1627 1628 /* Stop and disable unused NIC QMANs */ 1629 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1630 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1631 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1632 1633 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1634 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1635 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1636 1637 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1638 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1639 1640 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1641 } 1642 1643 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1644 if (rc) { 1645 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1646 return rc; 1647 } 1648 1649 /* Scrub both SRAM and DRAM */ 1650 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1651 if (rc) 1652 goto disable_pci_access; 1653 1654 rc = gaudi_fetch_psoc_frequency(hdev); 1655 if (rc) { 1656 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1657 goto disable_pci_access; 1658 } 1659 1660 rc = gaudi_mmu_clear_pgt_range(hdev); 1661 if (rc) { 1662 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1663 goto disable_pci_access; 1664 } 1665 1666 rc = gaudi_init_tpc_mem(hdev); 1667 if (rc) { 1668 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1669 goto disable_pci_access; 1670 } 1671 1672 rc = gaudi_collective_init(hdev); 1673 if (rc) { 1674 dev_err(hdev->dev, "Failed to init collective\n"); 1675 goto disable_pci_access; 1676 } 1677 1678 /* We only support a single ASID for the user, so for the sake of optimization, just 1679 * initialize the ASID one time during device initialization with the fixed value of 1 1680 */ 1681 gaudi_mmu_prepare(hdev, 1); 1682 1683 hl_fw_set_pll_profile(hdev); 1684 1685 return 0; 1686 1687 disable_pci_access: 1688 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1689 1690 return rc; 1691 } 1692 1693 static void gaudi_late_fini(struct hl_device *hdev) 1694 { 1695 hl_hwmon_release_resources(hdev); 1696 } 1697 1698 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1699 { 1700 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1701 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1702 int i, j, rc = 0; 1703 1704 /* 1705 * The device CPU works with 40-bits addresses, while bit 39 must be set 1706 * to '1' when accessing the host. 1707 * Bits 49:39 of the full host address are saved for a later 1708 * configuration of the HW to perform extension to 50 bits. 1709 * Because there is a single HW register that holds the extension bits, 1710 * these bits must be identical in all allocated range. 1711 */ 1712 1713 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1714 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1715 &dma_addr_arr[i], 1716 GFP_KERNEL | __GFP_ZERO); 1717 if (!virt_addr_arr[i]) { 1718 rc = -ENOMEM; 1719 goto free_dma_mem_arr; 1720 } 1721 1722 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1723 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1724 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1725 break; 1726 } 1727 1728 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1729 dev_err(hdev->dev, 1730 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1731 rc = -EFAULT; 1732 goto free_dma_mem_arr; 1733 } 1734 1735 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1736 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1737 hdev->cpu_pci_msb_addr = 1738 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1739 1740 if (!hdev->asic_prop.fw_security_enabled) 1741 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1742 1743 free_dma_mem_arr: 1744 for (j = 0 ; j < i ; j++) 1745 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1746 dma_addr_arr[j]); 1747 1748 return rc; 1749 } 1750 1751 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1752 { 1753 struct gaudi_device *gaudi = hdev->asic_specific; 1754 struct gaudi_internal_qman_info *q; 1755 u32 i; 1756 1757 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1758 q = &gaudi->internal_qmans[i]; 1759 if (!q->pq_kernel_addr) 1760 continue; 1761 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1762 } 1763 } 1764 1765 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1766 { 1767 struct gaudi_device *gaudi = hdev->asic_specific; 1768 struct gaudi_internal_qman_info *q; 1769 int rc, i; 1770 1771 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1772 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1773 continue; 1774 1775 q = &gaudi->internal_qmans[i]; 1776 1777 switch (i) { 1778 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1779 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1780 break; 1781 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1782 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1783 break; 1784 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1785 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1786 break; 1787 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1788 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1789 break; 1790 default: 1791 dev_err(hdev->dev, "Bad internal queue index %d", i); 1792 rc = -EINVAL; 1793 goto free_internal_qmans_pq_mem; 1794 } 1795 1796 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1797 GFP_KERNEL | __GFP_ZERO); 1798 if (!q->pq_kernel_addr) { 1799 rc = -ENOMEM; 1800 goto free_internal_qmans_pq_mem; 1801 } 1802 } 1803 1804 return 0; 1805 1806 free_internal_qmans_pq_mem: 1807 gaudi_free_internal_qmans_pq_mem(hdev); 1808 return rc; 1809 } 1810 1811 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1812 { 1813 struct asic_fixed_properties *prop = &hdev->asic_prop; 1814 struct pci_mem_region *region; 1815 1816 /* CFG */ 1817 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1818 region->region_base = CFG_BASE; 1819 region->region_size = CFG_SIZE; 1820 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1821 region->bar_size = CFG_BAR_SIZE; 1822 region->bar_id = CFG_BAR_ID; 1823 region->used = 1; 1824 1825 /* SRAM */ 1826 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1827 region->region_base = SRAM_BASE_ADDR; 1828 region->region_size = SRAM_SIZE; 1829 region->offset_in_bar = 0; 1830 region->bar_size = SRAM_BAR_SIZE; 1831 region->bar_id = SRAM_BAR_ID; 1832 region->used = 1; 1833 1834 /* DRAM */ 1835 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1836 region->region_base = DRAM_PHYS_BASE; 1837 region->region_size = hdev->asic_prop.dram_size; 1838 region->offset_in_bar = 0; 1839 region->bar_size = prop->dram_pci_bar_size; 1840 region->bar_id = HBM_BAR_ID; 1841 region->used = 1; 1842 1843 /* SP SRAM */ 1844 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1845 region->region_base = PSOC_SCRATCHPAD_ADDR; 1846 region->region_size = PSOC_SCRATCHPAD_SIZE; 1847 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1848 region->bar_size = CFG_BAR_SIZE; 1849 region->bar_id = CFG_BAR_ID; 1850 region->used = 1; 1851 } 1852 1853 static int gaudi_sw_init(struct hl_device *hdev) 1854 { 1855 struct gaudi_device *gaudi; 1856 u32 i, event_id = 0; 1857 int rc; 1858 1859 /* Allocate device structure */ 1860 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1861 if (!gaudi) 1862 return -ENOMEM; 1863 1864 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1865 if (gaudi_irq_map_table[i].valid) { 1866 if (event_id == GAUDI_EVENT_SIZE) { 1867 dev_err(hdev->dev, 1868 "Event array exceeds the limit of %u events\n", 1869 GAUDI_EVENT_SIZE); 1870 rc = -EINVAL; 1871 goto free_gaudi_device; 1872 } 1873 1874 gaudi->events[event_id++] = 1875 gaudi_irq_map_table[i].fc_id; 1876 } 1877 } 1878 1879 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1880 1881 hdev->asic_specific = gaudi; 1882 1883 /* Create DMA pool for small allocations */ 1884 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1885 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1886 if (!hdev->dma_pool) { 1887 dev_err(hdev->dev, "failed to create DMA pool\n"); 1888 rc = -ENOMEM; 1889 goto free_gaudi_device; 1890 } 1891 1892 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1893 if (rc) 1894 goto free_dma_pool; 1895 1896 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1897 if (!hdev->cpu_accessible_dma_pool) { 1898 dev_err(hdev->dev, 1899 "Failed to create CPU accessible DMA pool\n"); 1900 rc = -ENOMEM; 1901 goto free_cpu_dma_mem; 1902 } 1903 1904 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1905 (uintptr_t) hdev->cpu_accessible_dma_mem, 1906 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1907 if (rc) { 1908 dev_err(hdev->dev, 1909 "Failed to add memory to CPU accessible DMA pool\n"); 1910 rc = -EFAULT; 1911 goto free_cpu_accessible_dma_pool; 1912 } 1913 1914 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1915 if (rc) 1916 goto free_cpu_accessible_dma_pool; 1917 1918 spin_lock_init(&gaudi->hw_queues_lock); 1919 1920 hdev->supports_sync_stream = true; 1921 hdev->supports_coresight = true; 1922 hdev->supports_staged_submission = true; 1923 hdev->supports_wait_for_multi_cs = true; 1924 1925 hdev->asic_funcs->set_pci_memory_regions(hdev); 1926 hdev->stream_master_qid_arr = 1927 hdev->asic_funcs->get_stream_master_qid_arr(); 1928 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1929 1930 return 0; 1931 1932 free_cpu_accessible_dma_pool: 1933 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1934 free_cpu_dma_mem: 1935 if (!hdev->asic_prop.fw_security_enabled) 1936 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1937 hdev->cpu_pci_msb_addr); 1938 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1939 hdev->cpu_accessible_dma_address); 1940 free_dma_pool: 1941 dma_pool_destroy(hdev->dma_pool); 1942 free_gaudi_device: 1943 kfree(gaudi); 1944 return rc; 1945 } 1946 1947 static int gaudi_sw_fini(struct hl_device *hdev) 1948 { 1949 struct gaudi_device *gaudi = hdev->asic_specific; 1950 1951 gaudi_free_internal_qmans_pq_mem(hdev); 1952 1953 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1954 1955 if (!hdev->asic_prop.fw_security_enabled) 1956 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1957 hdev->cpu_pci_msb_addr); 1958 1959 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1960 hdev->cpu_accessible_dma_address); 1961 1962 dma_pool_destroy(hdev->dma_pool); 1963 1964 kfree(gaudi); 1965 1966 return 0; 1967 } 1968 1969 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1970 { 1971 struct hl_device *hdev = arg; 1972 int i; 1973 1974 if (hdev->disabled) 1975 return IRQ_HANDLED; 1976 1977 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1978 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1979 1980 hl_irq_handler_eq(irq, &hdev->event_queue); 1981 1982 return IRQ_HANDLED; 1983 } 1984 1985 /* 1986 * For backward compatibility, new MSI interrupts should be set after the 1987 * existing CPU and NIC interrupts. 1988 */ 1989 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1990 bool cpu_eq) 1991 { 1992 int msi_vec; 1993 1994 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1995 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1996 GAUDI_EVENT_QUEUE_MSI_IDX); 1997 1998 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1999 (nr + NIC_NUMBER_OF_ENGINES + 1); 2000 2001 return pci_irq_vector(hdev->pdev, msi_vec); 2002 } 2003 2004 static int gaudi_enable_msi_single(struct hl_device *hdev) 2005 { 2006 int rc, irq; 2007 2008 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2009 2010 irq = gaudi_pci_irq_vector(hdev, 0, false); 2011 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2012 "gaudi single msi", hdev); 2013 if (rc) 2014 dev_err(hdev->dev, 2015 "Failed to request single MSI IRQ\n"); 2016 2017 return rc; 2018 } 2019 2020 static int gaudi_enable_msi_multi(struct hl_device *hdev) 2021 { 2022 int cq_cnt = hdev->asic_prop.completion_queues_count; 2023 int rc, i, irq_cnt_init, irq; 2024 2025 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { 2026 irq = gaudi_pci_irq_vector(hdev, i, false); 2027 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], 2028 &hdev->completion_queue[i]); 2029 if (rc) { 2030 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2031 goto free_irqs; 2032 } 2033 } 2034 2035 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); 2036 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], 2037 &hdev->event_queue); 2038 if (rc) { 2039 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2040 goto free_irqs; 2041 } 2042 2043 return 0; 2044 2045 free_irqs: 2046 for (i = 0 ; i < irq_cnt_init ; i++) 2047 free_irq(gaudi_pci_irq_vector(hdev, i, false), 2048 &hdev->completion_queue[i]); 2049 return rc; 2050 } 2051 2052 static int gaudi_enable_msi(struct hl_device *hdev) 2053 { 2054 struct gaudi_device *gaudi = hdev->asic_specific; 2055 int rc; 2056 2057 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2058 return 0; 2059 2060 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2061 if (rc < 0) { 2062 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2063 return rc; 2064 } 2065 2066 if (rc < NUMBER_OF_INTERRUPTS) { 2067 gaudi->multi_msi_mode = false; 2068 rc = gaudi_enable_msi_single(hdev); 2069 } else { 2070 gaudi->multi_msi_mode = true; 2071 rc = gaudi_enable_msi_multi(hdev); 2072 } 2073 2074 if (rc) 2075 goto free_pci_irq_vectors; 2076 2077 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2078 2079 return 0; 2080 2081 free_pci_irq_vectors: 2082 pci_free_irq_vectors(hdev->pdev); 2083 return rc; 2084 } 2085 2086 static void gaudi_sync_irqs(struct hl_device *hdev) 2087 { 2088 struct gaudi_device *gaudi = hdev->asic_specific; 2089 int i, cq_cnt = hdev->asic_prop.completion_queues_count; 2090 2091 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2092 return; 2093 2094 /* Wait for all pending IRQs to be finished */ 2095 if (gaudi->multi_msi_mode) { 2096 for (i = 0 ; i < cq_cnt ; i++) 2097 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); 2098 2099 synchronize_irq(gaudi_pci_irq_vector(hdev, 2100 GAUDI_EVENT_QUEUE_MSI_IDX, 2101 true)); 2102 } else { 2103 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2104 } 2105 } 2106 2107 static void gaudi_disable_msi(struct hl_device *hdev) 2108 { 2109 struct gaudi_device *gaudi = hdev->asic_specific; 2110 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; 2111 2112 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2113 return; 2114 2115 gaudi_sync_irqs(hdev); 2116 2117 if (gaudi->multi_msi_mode) { 2118 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, 2119 true); 2120 free_irq(irq, &hdev->event_queue); 2121 2122 for (i = 0 ; i < cq_cnt ; i++) { 2123 irq = gaudi_pci_irq_vector(hdev, i, false); 2124 free_irq(irq, &hdev->completion_queue[i]); 2125 } 2126 } else { 2127 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2128 } 2129 2130 pci_free_irq_vectors(hdev->pdev); 2131 2132 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2133 } 2134 2135 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2136 { 2137 struct gaudi_device *gaudi = hdev->asic_specific; 2138 2139 if (hdev->asic_prop.fw_security_enabled) 2140 return; 2141 2142 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2143 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2144 return; 2145 2146 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2147 return; 2148 2149 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2150 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2151 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2152 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2153 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2154 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2155 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2156 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2157 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2159 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2161 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2163 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2165 2166 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2167 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2168 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2169 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2170 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2171 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2172 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2173 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2174 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2175 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2176 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2178 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2179 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2180 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2181 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2182 2183 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2184 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2185 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2186 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2187 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2188 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2189 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2190 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2191 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2192 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2193 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2195 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2196 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2197 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2198 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2199 2200 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2201 } 2202 2203 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2204 { 2205 struct gaudi_device *gaudi = hdev->asic_specific; 2206 2207 if (hdev->asic_prop.fw_security_enabled) 2208 return; 2209 2210 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2211 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2212 return; 2213 2214 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2215 return; 2216 2217 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2218 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2219 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2220 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2221 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2222 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2223 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2225 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2227 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2229 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2231 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2233 2234 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2235 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2236 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2237 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2238 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2239 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2240 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2242 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2243 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2244 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2245 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2246 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2247 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2248 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2249 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2250 2251 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2252 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2253 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2254 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2255 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2256 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2257 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2259 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2260 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2261 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2262 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2263 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2264 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2265 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2266 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2267 2268 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2269 } 2270 2271 static void gaudi_init_e2e(struct hl_device *hdev) 2272 { 2273 if (hdev->asic_prop.fw_security_enabled) 2274 return; 2275 2276 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2277 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2278 return; 2279 2280 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2281 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2282 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2283 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2284 2285 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2286 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2287 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2288 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2289 2290 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2291 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2292 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2293 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2294 2295 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2296 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2297 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2298 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2299 2300 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2301 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2302 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2303 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2304 2305 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2306 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2307 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2308 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2309 2310 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2311 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2312 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2313 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2314 2315 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2316 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2317 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2318 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2319 2320 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2321 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2322 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2323 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2324 2325 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2326 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2327 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2328 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2329 2330 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2331 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2332 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2333 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2334 2335 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2336 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2337 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2338 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2339 2340 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2341 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2342 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2343 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2344 2345 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2346 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2347 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2348 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2349 2350 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2351 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2352 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2353 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2354 2355 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2356 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2357 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2358 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2359 2360 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2361 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2362 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2363 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2364 2365 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2366 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2367 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2368 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2369 2370 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2371 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2372 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2373 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2374 2375 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2376 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2377 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2378 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2379 2380 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2381 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2382 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2383 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2384 2385 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2386 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2387 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2388 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2389 2390 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2391 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2392 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2393 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2394 2395 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2396 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2397 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2398 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2399 2400 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2401 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2402 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2403 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2404 2405 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2406 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2407 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2408 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2409 2410 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2411 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2412 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2413 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2414 2415 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2416 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2417 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2418 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2419 2420 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2421 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2422 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2423 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2424 2425 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2426 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2427 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2428 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2429 2430 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2431 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2432 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2433 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2434 2435 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2436 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2437 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2438 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2439 2440 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2441 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2442 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2443 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2444 2445 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2446 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2447 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2448 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2449 2450 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2451 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2452 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2453 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2454 2455 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2456 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2457 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2458 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2459 2460 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2461 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2462 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2463 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2464 2465 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2466 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2467 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2468 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2469 2470 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2471 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2472 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2473 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2474 2475 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2476 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2477 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2478 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2479 2480 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2481 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2482 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2483 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2484 2485 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2486 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2487 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2488 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2489 2490 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2491 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2492 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2493 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2494 2495 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2496 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2497 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2498 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2499 2500 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2501 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2502 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2503 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2504 2505 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2506 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2507 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2508 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2509 2510 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2511 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2512 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2513 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2514 2515 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2516 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2517 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2518 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2519 } 2520 2521 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2522 { 2523 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2524 2525 if (hdev->asic_prop.fw_security_enabled) 2526 return; 2527 2528 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2529 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2530 return; 2531 2532 hbm0_wr = 0x33333333; 2533 hbm0_rd = 0x77777777; 2534 hbm1_wr = 0x55555555; 2535 hbm1_rd = 0xDDDDDDDD; 2536 2537 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2538 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2539 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2540 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2541 2542 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2543 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2544 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2545 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2546 2547 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2548 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2549 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2550 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2551 2552 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2553 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2554 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2555 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2556 2557 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2558 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2559 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2560 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2561 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2562 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2563 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2564 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2565 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2566 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2567 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2568 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2569 2570 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2571 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2572 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2573 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2574 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2575 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2576 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2577 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2578 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2579 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2580 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2581 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2582 } 2583 2584 static void gaudi_init_golden_registers(struct hl_device *hdev) 2585 { 2586 u32 tpc_offset; 2587 int tpc_id, i; 2588 2589 gaudi_init_e2e(hdev); 2590 gaudi_init_hbm_cred(hdev); 2591 2592 for (tpc_id = 0, tpc_offset = 0; 2593 tpc_id < TPC_NUMBER_OF_ENGINES; 2594 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2595 /* Mask all arithmetic interrupts from TPC */ 2596 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2597 /* Set 16 cache lines */ 2598 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2599 ICACHE_FETCH_LINE_NUM, 2); 2600 } 2601 2602 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2603 for (i = 0 ; i < 128 ; i += 8) 2604 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2605 2606 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2607 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2608 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2609 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2610 } 2611 2612 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2613 int qman_id, dma_addr_t qman_pq_addr) 2614 { 2615 struct cpu_dyn_regs *dyn_regs = 2616 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2617 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2618 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2619 u32 q_off, dma_qm_offset; 2620 u32 dma_qm_err_cfg, irq_handler_offset; 2621 2622 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2623 2624 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2625 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2626 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2627 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2628 so_base_en_lo = lower_32_bits(CFG_BASE + 2629 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2630 so_base_en_hi = upper_32_bits(CFG_BASE + 2631 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2632 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2633 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2634 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2635 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2636 so_base_ws_lo = lower_32_bits(CFG_BASE + 2637 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2638 so_base_ws_hi = upper_32_bits(CFG_BASE + 2639 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2640 2641 q_off = dma_qm_offset + qman_id * 4; 2642 2643 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2644 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2645 2646 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2647 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2648 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2649 2650 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2651 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2652 QMAN_LDMA_SRC_OFFSET); 2653 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2654 QMAN_LDMA_DST_OFFSET); 2655 2656 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2657 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2658 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2659 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2660 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2661 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2662 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2663 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2664 2665 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2666 2667 /* The following configuration is needed only once per QMAN */ 2668 if (qman_id == 0) { 2669 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2670 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2671 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2672 2673 /* Configure RAZWI IRQ */ 2674 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2675 if (hdev->stop_on_err) 2676 dma_qm_err_cfg |= 2677 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2678 2679 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2680 2681 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2682 lower_32_bits(CFG_BASE + irq_handler_offset)); 2683 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2684 upper_32_bits(CFG_BASE + irq_handler_offset)); 2685 2686 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2687 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2688 dma_id); 2689 2690 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2691 QM_ARB_ERR_MSG_EN_MASK); 2692 2693 /* Set timeout to maximum */ 2694 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2695 2696 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2697 QMAN_EXTERNAL_MAKE_TRUSTED); 2698 2699 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2700 } 2701 } 2702 2703 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2704 { 2705 struct cpu_dyn_regs *dyn_regs = 2706 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2707 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2708 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2709 u32 irq_handler_offset; 2710 2711 /* Set to maximum possible according to physical size */ 2712 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2713 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2714 2715 /* WA for H/W bug H3-2116 */ 2716 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2717 2718 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2719 if (hdev->stop_on_err) 2720 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2721 2722 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2723 2724 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2725 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2726 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2727 2728 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2729 lower_32_bits(CFG_BASE + irq_handler_offset)); 2730 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2731 upper_32_bits(CFG_BASE + irq_handler_offset)); 2732 2733 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2734 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2735 WREG32(mmDMA0_CORE_PROT + dma_offset, 2736 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2737 /* If the channel is secured, it should be in MMU bypass mode */ 2738 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2739 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2740 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2741 } 2742 2743 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2744 u32 enable_mask) 2745 { 2746 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2747 2748 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2749 } 2750 2751 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2752 { 2753 struct gaudi_device *gaudi = hdev->asic_specific; 2754 struct hl_hw_queue *q; 2755 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2756 2757 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2758 return; 2759 2760 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2761 dma_id = gaudi_dma_assignment[i]; 2762 /* 2763 * For queues after the CPU Q need to add 1 to get the correct 2764 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2765 * order to get the correct MSI register. 2766 */ 2767 if (dma_id > 1) { 2768 cpu_skip = 1; 2769 nic_skip = NIC_NUMBER_OF_ENGINES; 2770 } else { 2771 cpu_skip = 0; 2772 nic_skip = 0; 2773 } 2774 2775 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2776 q_idx = 4 * dma_id + j + cpu_skip; 2777 q = &hdev->kernel_queues[q_idx]; 2778 q->cq_id = cq_id++; 2779 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2780 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2781 q->bus_address); 2782 } 2783 2784 gaudi_init_dma_core(hdev, dma_id); 2785 2786 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2787 } 2788 2789 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2790 } 2791 2792 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2793 int qman_id, u64 qman_base_addr) 2794 { 2795 struct cpu_dyn_regs *dyn_regs = 2796 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2797 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2798 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2799 u32 dma_qm_err_cfg, irq_handler_offset; 2800 u32 q_off, dma_qm_offset; 2801 2802 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2803 2804 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2805 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2806 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2807 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2808 so_base_en_lo = lower_32_bits(CFG_BASE + 2809 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2810 so_base_en_hi = upper_32_bits(CFG_BASE + 2811 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2812 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2813 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2814 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2815 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2816 so_base_ws_lo = lower_32_bits(CFG_BASE + 2817 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2818 so_base_ws_hi = upper_32_bits(CFG_BASE + 2819 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2820 2821 q_off = dma_qm_offset + qman_id * 4; 2822 2823 if (qman_id < 4) { 2824 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2825 lower_32_bits(qman_base_addr)); 2826 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2827 upper_32_bits(qman_base_addr)); 2828 2829 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2830 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2831 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2832 2833 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2834 QMAN_CPDMA_SIZE_OFFSET); 2835 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2836 QMAN_CPDMA_SRC_OFFSET); 2837 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2838 QMAN_CPDMA_DST_OFFSET); 2839 } else { 2840 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2841 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2842 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2843 2844 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2845 QMAN_LDMA_SIZE_OFFSET); 2846 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2847 QMAN_LDMA_SRC_OFFSET); 2848 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2849 QMAN_LDMA_DST_OFFSET); 2850 2851 /* Configure RAZWI IRQ */ 2852 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2853 if (hdev->stop_on_err) 2854 dma_qm_err_cfg |= 2855 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2856 2857 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2858 2859 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2860 lower_32_bits(CFG_BASE + irq_handler_offset)); 2861 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2862 upper_32_bits(CFG_BASE + irq_handler_offset)); 2863 2864 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2865 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2866 dma_id); 2867 2868 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2869 QM_ARB_ERR_MSG_EN_MASK); 2870 2871 /* Set timeout to maximum */ 2872 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2873 2874 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2875 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2876 QMAN_INTERNAL_MAKE_TRUSTED); 2877 } 2878 2879 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2880 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2881 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2882 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2883 2884 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2885 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2886 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2887 mtr_base_ws_lo); 2888 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2889 mtr_base_ws_hi); 2890 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2891 so_base_ws_lo); 2892 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2893 so_base_ws_hi); 2894 } 2895 } 2896 2897 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2898 { 2899 struct gaudi_device *gaudi = hdev->asic_specific; 2900 struct gaudi_internal_qman_info *q; 2901 u64 qman_base_addr; 2902 int i, j, dma_id, internal_q_index; 2903 2904 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2905 return; 2906 2907 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2908 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2909 2910 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2911 /* 2912 * Add the CPU queue in order to get the correct queue 2913 * number as all internal queue are placed after it 2914 */ 2915 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2916 2917 q = &gaudi->internal_qmans[internal_q_index]; 2918 qman_base_addr = (u64) q->pq_dma_addr; 2919 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2920 qman_base_addr); 2921 } 2922 2923 /* Initializing lower CP for HBM DMA QMAN */ 2924 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2925 2926 gaudi_init_dma_core(hdev, dma_id); 2927 2928 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2929 } 2930 2931 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2932 } 2933 2934 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2935 int qman_id, u64 qman_base_addr) 2936 { 2937 struct cpu_dyn_regs *dyn_regs = 2938 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2939 u32 mtr_base_lo, mtr_base_hi; 2940 u32 so_base_lo, so_base_hi; 2941 u32 irq_handler_offset; 2942 u32 q_off, mme_id; 2943 u32 mme_qm_err_cfg; 2944 2945 mtr_base_lo = lower_32_bits(CFG_BASE + 2946 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2947 mtr_base_hi = upper_32_bits(CFG_BASE + 2948 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2949 so_base_lo = lower_32_bits(CFG_BASE + 2950 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2951 so_base_hi = upper_32_bits(CFG_BASE + 2952 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2953 2954 q_off = mme_offset + qman_id * 4; 2955 2956 if (qman_id < 4) { 2957 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2958 lower_32_bits(qman_base_addr)); 2959 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2960 upper_32_bits(qman_base_addr)); 2961 2962 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2963 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2964 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2965 2966 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2967 QMAN_CPDMA_SIZE_OFFSET); 2968 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2969 QMAN_CPDMA_SRC_OFFSET); 2970 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2971 QMAN_CPDMA_DST_OFFSET); 2972 } else { 2973 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2974 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2975 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2976 2977 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2978 QMAN_LDMA_SIZE_OFFSET); 2979 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2980 QMAN_LDMA_SRC_OFFSET); 2981 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2982 QMAN_LDMA_DST_OFFSET); 2983 2984 /* Configure RAZWI IRQ */ 2985 mme_id = mme_offset / 2986 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2987 2988 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2989 if (hdev->stop_on_err) 2990 mme_qm_err_cfg |= 2991 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2992 2993 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2994 2995 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2996 lower_32_bits(CFG_BASE + irq_handler_offset)); 2997 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2998 upper_32_bits(CFG_BASE + irq_handler_offset)); 2999 3000 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 3001 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 3002 mme_id); 3003 3004 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 3005 QM_ARB_ERR_MSG_EN_MASK); 3006 3007 /* Set timeout to maximum */ 3008 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 3009 3010 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 3011 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 3012 QMAN_INTERNAL_MAKE_TRUSTED); 3013 } 3014 3015 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 3016 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 3017 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 3018 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 3019 } 3020 3021 static void gaudi_init_mme_qmans(struct hl_device *hdev) 3022 { 3023 struct gaudi_device *gaudi = hdev->asic_specific; 3024 struct gaudi_internal_qman_info *q; 3025 u64 qman_base_addr; 3026 u32 mme_offset; 3027 int i, internal_q_index; 3028 3029 if (gaudi->hw_cap_initialized & HW_CAP_MME) 3030 return; 3031 3032 /* 3033 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 3034 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 3035 */ 3036 3037 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3038 3039 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 3040 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 3041 q = &gaudi->internal_qmans[internal_q_index]; 3042 qman_base_addr = (u64) q->pq_dma_addr; 3043 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 3044 qman_base_addr); 3045 if (i == 3) 3046 mme_offset = 0; 3047 } 3048 3049 /* Initializing lower CP for MME QMANs */ 3050 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3051 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 3052 gaudi_init_mme_qman(hdev, 0, 4, 0); 3053 3054 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3055 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3056 3057 gaudi->hw_cap_initialized |= HW_CAP_MME; 3058 } 3059 3060 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 3061 int qman_id, u64 qman_base_addr) 3062 { 3063 struct cpu_dyn_regs *dyn_regs = 3064 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3065 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3066 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3067 u32 tpc_qm_err_cfg, irq_handler_offset; 3068 u32 q_off, tpc_id; 3069 3070 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3071 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3072 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3073 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3074 so_base_en_lo = lower_32_bits(CFG_BASE + 3075 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3076 so_base_en_hi = upper_32_bits(CFG_BASE + 3077 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3078 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3079 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3080 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3081 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3082 so_base_ws_lo = lower_32_bits(CFG_BASE + 3083 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3084 so_base_ws_hi = upper_32_bits(CFG_BASE + 3085 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3086 3087 q_off = tpc_offset + qman_id * 4; 3088 3089 tpc_id = tpc_offset / 3090 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3091 3092 if (qman_id < 4) { 3093 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3094 lower_32_bits(qman_base_addr)); 3095 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3096 upper_32_bits(qman_base_addr)); 3097 3098 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3099 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3100 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3101 3102 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3103 QMAN_CPDMA_SIZE_OFFSET); 3104 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3105 QMAN_CPDMA_SRC_OFFSET); 3106 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3107 QMAN_CPDMA_DST_OFFSET); 3108 } else { 3109 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3110 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3111 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3112 3113 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3114 QMAN_LDMA_SIZE_OFFSET); 3115 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3116 QMAN_LDMA_SRC_OFFSET); 3117 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3118 QMAN_LDMA_DST_OFFSET); 3119 3120 /* Configure RAZWI IRQ */ 3121 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3122 if (hdev->stop_on_err) 3123 tpc_qm_err_cfg |= 3124 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3125 3126 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3127 3128 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3129 lower_32_bits(CFG_BASE + irq_handler_offset)); 3130 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3131 upper_32_bits(CFG_BASE + irq_handler_offset)); 3132 3133 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3134 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3135 tpc_id); 3136 3137 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3138 QM_ARB_ERR_MSG_EN_MASK); 3139 3140 /* Set timeout to maximum */ 3141 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3142 3143 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3144 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3145 QMAN_INTERNAL_MAKE_TRUSTED); 3146 } 3147 3148 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3149 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3150 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3151 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3152 3153 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3154 if (tpc_id == 6) { 3155 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3156 mtr_base_ws_lo); 3157 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3158 mtr_base_ws_hi); 3159 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3160 so_base_ws_lo); 3161 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3162 so_base_ws_hi); 3163 } 3164 } 3165 3166 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3167 { 3168 struct gaudi_device *gaudi = hdev->asic_specific; 3169 struct gaudi_internal_qman_info *q; 3170 u64 qman_base_addr; 3171 u32 so_base_hi, tpc_offset = 0; 3172 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3173 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3174 int i, tpc_id, internal_q_index; 3175 3176 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3177 return; 3178 3179 so_base_hi = upper_32_bits(CFG_BASE + 3180 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3181 3182 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3183 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3184 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3185 tpc_id * QMAN_STREAMS + i; 3186 q = &gaudi->internal_qmans[internal_q_index]; 3187 qman_base_addr = (u64) q->pq_dma_addr; 3188 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3189 qman_base_addr); 3190 3191 if (i == 3) { 3192 /* Initializing lower CP for TPC QMAN */ 3193 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3194 3195 /* Enable the QMAN and TPC channel */ 3196 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3197 QMAN_TPC_ENABLE); 3198 } 3199 } 3200 3201 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3202 so_base_hi); 3203 3204 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3205 3206 gaudi->hw_cap_initialized |= 3207 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3208 } 3209 } 3210 3211 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3212 int qman_id, u64 qman_base_addr, int nic_id) 3213 { 3214 struct cpu_dyn_regs *dyn_regs = 3215 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3216 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3217 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3218 u32 nic_qm_err_cfg, irq_handler_offset; 3219 u32 q_off; 3220 3221 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3222 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3223 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3224 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3225 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3226 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3227 so_base_en_hi = upper_32_bits(CFG_BASE + 3228 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3229 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3230 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3231 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3232 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3233 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3234 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3235 so_base_ws_hi = upper_32_bits(CFG_BASE + 3236 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3237 3238 q_off = nic_offset + qman_id * 4; 3239 3240 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3241 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3242 3243 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3244 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3245 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3246 3247 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3248 QMAN_LDMA_SIZE_OFFSET); 3249 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3250 QMAN_LDMA_SRC_OFFSET); 3251 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3252 QMAN_LDMA_DST_OFFSET); 3253 3254 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3255 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3256 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3257 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3258 3259 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3260 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3261 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3262 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3263 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3264 3265 if (qman_id == 0) { 3266 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3267 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3268 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3269 3270 /* Configure RAZWI IRQ */ 3271 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3272 if (hdev->stop_on_err) 3273 nic_qm_err_cfg |= 3274 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3275 3276 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3277 3278 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3279 lower_32_bits(CFG_BASE + irq_handler_offset)); 3280 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3281 upper_32_bits(CFG_BASE + irq_handler_offset)); 3282 3283 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3284 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3285 nic_id); 3286 3287 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3288 QM_ARB_ERR_MSG_EN_MASK); 3289 3290 /* Set timeout to maximum */ 3291 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3292 3293 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3294 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3295 QMAN_INTERNAL_MAKE_TRUSTED); 3296 } 3297 } 3298 3299 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3300 { 3301 struct gaudi_device *gaudi = hdev->asic_specific; 3302 struct gaudi_internal_qman_info *q; 3303 u64 qman_base_addr; 3304 u32 nic_offset = 0; 3305 u32 nic_delta_between_qmans = 3306 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3307 u32 nic_delta_between_nics = 3308 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3309 int i, nic_id, internal_q_index; 3310 3311 if (!hdev->nic_ports_mask) 3312 return; 3313 3314 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3315 return; 3316 3317 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3318 3319 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3320 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3321 nic_offset += nic_delta_between_qmans; 3322 if (nic_id & 1) { 3323 nic_offset -= (nic_delta_between_qmans * 2); 3324 nic_offset += nic_delta_between_nics; 3325 } 3326 continue; 3327 } 3328 3329 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3330 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3331 nic_id * QMAN_STREAMS + i; 3332 q = &gaudi->internal_qmans[internal_q_index]; 3333 qman_base_addr = (u64) q->pq_dma_addr; 3334 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3335 qman_base_addr, nic_id); 3336 } 3337 3338 /* Enable the QMAN */ 3339 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3340 3341 nic_offset += nic_delta_between_qmans; 3342 if (nic_id & 1) { 3343 nic_offset -= (nic_delta_between_qmans * 2); 3344 nic_offset += nic_delta_between_nics; 3345 } 3346 3347 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3348 } 3349 } 3350 3351 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3352 { 3353 struct gaudi_device *gaudi = hdev->asic_specific; 3354 3355 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3356 return; 3357 3358 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3359 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3360 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3361 } 3362 3363 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3364 { 3365 struct gaudi_device *gaudi = hdev->asic_specific; 3366 3367 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3368 return; 3369 3370 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3371 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3372 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3373 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3374 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3375 } 3376 3377 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3378 { 3379 struct gaudi_device *gaudi = hdev->asic_specific; 3380 3381 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3382 return; 3383 3384 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3385 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3386 } 3387 3388 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3389 { 3390 struct gaudi_device *gaudi = hdev->asic_specific; 3391 u32 tpc_offset = 0; 3392 int tpc_id; 3393 3394 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3395 return; 3396 3397 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3398 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3399 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3400 } 3401 } 3402 3403 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3404 { 3405 struct gaudi_device *gaudi = hdev->asic_specific; 3406 u32 nic_mask, nic_offset = 0; 3407 u32 nic_delta_between_qmans = 3408 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3409 u32 nic_delta_between_nics = 3410 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3411 int nic_id; 3412 3413 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3414 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3415 3416 if (gaudi->hw_cap_initialized & nic_mask) 3417 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3418 3419 nic_offset += nic_delta_between_qmans; 3420 if (nic_id & 1) { 3421 nic_offset -= (nic_delta_between_qmans * 2); 3422 nic_offset += nic_delta_between_nics; 3423 } 3424 } 3425 } 3426 3427 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3428 { 3429 struct gaudi_device *gaudi = hdev->asic_specific; 3430 3431 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3432 return; 3433 3434 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3435 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3436 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3437 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3438 } 3439 3440 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3441 { 3442 struct gaudi_device *gaudi = hdev->asic_specific; 3443 3444 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3445 return; 3446 3447 /* Stop CPs of HBM DMA QMANs */ 3448 3449 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3450 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3451 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3452 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3453 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3454 } 3455 3456 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3457 { 3458 struct gaudi_device *gaudi = hdev->asic_specific; 3459 3460 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3461 return; 3462 3463 /* Stop CPs of MME QMANs */ 3464 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3465 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3466 } 3467 3468 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3469 { 3470 struct gaudi_device *gaudi = hdev->asic_specific; 3471 3472 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3473 return; 3474 3475 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3476 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3477 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3478 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3479 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3480 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3481 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3482 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3483 } 3484 3485 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3486 { 3487 struct gaudi_device *gaudi = hdev->asic_specific; 3488 3489 /* Stop upper CPs of QMANs */ 3490 3491 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3492 WREG32(mmNIC0_QM0_GLBL_CFG1, 3493 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3494 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3495 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3496 3497 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3498 WREG32(mmNIC0_QM1_GLBL_CFG1, 3499 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3500 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3501 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3502 3503 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3504 WREG32(mmNIC1_QM0_GLBL_CFG1, 3505 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3506 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3507 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3508 3509 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3510 WREG32(mmNIC1_QM1_GLBL_CFG1, 3511 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3512 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3513 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3514 3515 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3516 WREG32(mmNIC2_QM0_GLBL_CFG1, 3517 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3518 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3519 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3520 3521 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3522 WREG32(mmNIC2_QM1_GLBL_CFG1, 3523 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3524 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3525 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3526 3527 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3528 WREG32(mmNIC3_QM0_GLBL_CFG1, 3529 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3530 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3531 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3532 3533 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3534 WREG32(mmNIC3_QM1_GLBL_CFG1, 3535 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3536 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3537 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3538 3539 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3540 WREG32(mmNIC4_QM0_GLBL_CFG1, 3541 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3542 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3543 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3544 3545 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3546 WREG32(mmNIC4_QM1_GLBL_CFG1, 3547 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3548 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3549 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3550 } 3551 3552 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3553 { 3554 struct gaudi_device *gaudi = hdev->asic_specific; 3555 3556 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3557 return; 3558 3559 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3560 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3561 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3562 } 3563 3564 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3565 { 3566 struct gaudi_device *gaudi = hdev->asic_specific; 3567 3568 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3569 return; 3570 3571 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3572 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3573 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3574 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3575 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3576 } 3577 3578 static void gaudi_mme_stall(struct hl_device *hdev) 3579 { 3580 struct gaudi_device *gaudi = hdev->asic_specific; 3581 3582 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3583 return; 3584 3585 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3586 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3587 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3588 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3589 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3590 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3591 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3592 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3593 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3594 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3595 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3596 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3597 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3598 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3599 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3600 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3601 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3602 } 3603 3604 static void gaudi_tpc_stall(struct hl_device *hdev) 3605 { 3606 struct gaudi_device *gaudi = hdev->asic_specific; 3607 3608 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3609 return; 3610 3611 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3612 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3613 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3614 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3615 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3616 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3617 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3618 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3619 } 3620 3621 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3622 { 3623 u32 qman_offset; 3624 int i; 3625 3626 if (hdev->asic_prop.fw_security_enabled) 3627 return; 3628 3629 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3630 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3631 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3632 3633 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3634 } 3635 3636 WREG32(mmMME0_QM_CGM_CFG, 0); 3637 WREG32(mmMME0_QM_CGM_CFG1, 0); 3638 WREG32(mmMME2_QM_CGM_CFG, 0); 3639 WREG32(mmMME2_QM_CGM_CFG1, 0); 3640 3641 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3642 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3643 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3644 3645 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3646 } 3647 } 3648 3649 static void gaudi_enable_timestamp(struct hl_device *hdev) 3650 { 3651 /* Disable the timestamp counter */ 3652 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3653 3654 /* Zero the lower/upper parts of the 64-bit counter */ 3655 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3656 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3657 3658 /* Enable the counter */ 3659 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3660 } 3661 3662 static void gaudi_disable_timestamp(struct hl_device *hdev) 3663 { 3664 /* Disable the timestamp counter */ 3665 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3666 } 3667 3668 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3669 { 3670 u32 wait_timeout_ms; 3671 3672 if (hdev->pldm) 3673 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3674 else 3675 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3676 3677 if (fw_reset) 3678 goto skip_engines; 3679 3680 gaudi_stop_nic_qmans(hdev); 3681 gaudi_stop_mme_qmans(hdev); 3682 gaudi_stop_tpc_qmans(hdev); 3683 gaudi_stop_hbm_dma_qmans(hdev); 3684 gaudi_stop_pci_dma_qmans(hdev); 3685 3686 msleep(wait_timeout_ms); 3687 3688 gaudi_pci_dma_stall(hdev); 3689 gaudi_hbm_dma_stall(hdev); 3690 gaudi_tpc_stall(hdev); 3691 gaudi_mme_stall(hdev); 3692 3693 msleep(wait_timeout_ms); 3694 3695 gaudi_disable_nic_qmans(hdev); 3696 gaudi_disable_mme_qmans(hdev); 3697 gaudi_disable_tpc_qmans(hdev); 3698 gaudi_disable_hbm_dma_qmans(hdev); 3699 gaudi_disable_pci_dma_qmans(hdev); 3700 3701 gaudi_disable_timestamp(hdev); 3702 3703 skip_engines: 3704 gaudi_disable_msi(hdev); 3705 } 3706 3707 static int gaudi_mmu_init(struct hl_device *hdev) 3708 { 3709 struct asic_fixed_properties *prop = &hdev->asic_prop; 3710 struct gaudi_device *gaudi = hdev->asic_specific; 3711 u64 hop0_addr; 3712 int rc, i; 3713 3714 if (!hdev->mmu_enable) 3715 return 0; 3716 3717 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3718 return 0; 3719 3720 for (i = 0 ; i < prop->max_asid ; i++) { 3721 hop0_addr = prop->mmu_pgt_addr + 3722 (i * prop->mmu_hop_table_size); 3723 3724 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3725 if (rc) { 3726 dev_err(hdev->dev, 3727 "failed to set hop0 addr for asid %d\n", i); 3728 return rc; 3729 } 3730 } 3731 3732 /* init MMU cache manage page */ 3733 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3734 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3735 3736 /* mem cache invalidation */ 3737 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3738 3739 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3740 if (rc) 3741 return rc; 3742 3743 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3744 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3745 3746 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3747 3748 /* 3749 * The H/W expects the first PI after init to be 1. After wraparound 3750 * we'll write 0. 3751 */ 3752 gaudi->mmu_cache_inv_pi = 1; 3753 3754 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3755 3756 return 0; 3757 } 3758 3759 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3760 { 3761 void __iomem *dst; 3762 3763 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3764 3765 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3766 } 3767 3768 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3769 { 3770 void __iomem *dst; 3771 3772 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3773 3774 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3775 } 3776 3777 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3778 { 3779 struct dynamic_fw_load_mgr *dynamic_loader; 3780 struct cpu_dyn_regs *dyn_regs; 3781 3782 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3783 3784 /* 3785 * here we update initial values for few specific dynamic regs (as 3786 * before reading the first descriptor from FW those value has to be 3787 * hard-coded) in later stages of the protocol those values will be 3788 * updated automatically by reading the FW descriptor so data there 3789 * will always be up-to-date 3790 */ 3791 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3792 dyn_regs->kmd_msg_to_cpu = 3793 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3794 dyn_regs->cpu_cmd_status_to_host = 3795 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3796 3797 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3798 } 3799 3800 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3801 { 3802 struct static_fw_load_mgr *static_loader; 3803 3804 static_loader = &hdev->fw_loader.static_loader; 3805 3806 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3807 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3808 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3809 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3810 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3811 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3812 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3813 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3814 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3815 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3816 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3817 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3818 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3819 GAUDI_PLDM_RESET_WAIT_MSEC : 3820 GAUDI_CPU_RESET_WAIT_MSEC; 3821 } 3822 3823 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3824 { 3825 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3826 3827 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3828 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3829 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3830 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3831 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3832 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3833 } 3834 3835 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3836 { 3837 struct asic_fixed_properties *prop = &hdev->asic_prop; 3838 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3839 3840 /* fill common fields */ 3841 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3842 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3843 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3844 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3845 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3846 fw_loader->skip_bmc = !hdev->bmc_enable; 3847 fw_loader->sram_bar_id = SRAM_BAR_ID; 3848 fw_loader->dram_bar_id = HBM_BAR_ID; 3849 3850 if (prop->dynamic_fw_load) 3851 gaudi_init_dynamic_firmware_loader(hdev); 3852 else 3853 gaudi_init_static_firmware_loader(hdev); 3854 } 3855 3856 static int gaudi_init_cpu(struct hl_device *hdev) 3857 { 3858 struct gaudi_device *gaudi = hdev->asic_specific; 3859 int rc; 3860 3861 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3862 return 0; 3863 3864 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3865 return 0; 3866 3867 /* 3868 * The device CPU works with 40 bits addresses. 3869 * This register sets the extension to 50 bits. 3870 */ 3871 if (!hdev->asic_prop.fw_security_enabled) 3872 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3873 3874 rc = hl_fw_init_cpu(hdev); 3875 3876 if (rc) 3877 return rc; 3878 3879 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3880 3881 return 0; 3882 } 3883 3884 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3885 { 3886 struct cpu_dyn_regs *dyn_regs = 3887 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3888 struct asic_fixed_properties *prop = &hdev->asic_prop; 3889 struct gaudi_device *gaudi = hdev->asic_specific; 3890 u32 status, irq_handler_offset; 3891 struct hl_eq *eq; 3892 struct hl_hw_queue *cpu_pq = 3893 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3894 int err; 3895 3896 if (!hdev->cpu_queues_enable) 3897 return 0; 3898 3899 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3900 return 0; 3901 3902 eq = &hdev->event_queue; 3903 3904 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3905 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3906 3907 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3908 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3909 3910 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3911 lower_32_bits(hdev->cpu_accessible_dma_address)); 3912 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3913 upper_32_bits(hdev->cpu_accessible_dma_address)); 3914 3915 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3916 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3917 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3918 3919 /* Used for EQ CI */ 3920 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3921 3922 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3923 3924 if (gaudi->multi_msi_mode) 3925 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 3926 else 3927 WREG32(mmCPU_IF_QUEUE_INIT, 3928 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3929 3930 irq_handler_offset = prop->gic_interrupts_enable ? 3931 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3932 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3933 3934 WREG32(irq_handler_offset, 3935 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3936 3937 err = hl_poll_timeout( 3938 hdev, 3939 mmCPU_IF_QUEUE_INIT, 3940 status, 3941 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3942 1000, 3943 cpu_timeout); 3944 3945 if (err) { 3946 dev_err(hdev->dev, 3947 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3948 return -EIO; 3949 } 3950 3951 /* update FW application security bits */ 3952 if (prop->fw_cpu_boot_dev_sts0_valid) 3953 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3954 if (prop->fw_cpu_boot_dev_sts1_valid) 3955 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3956 3957 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3958 return 0; 3959 } 3960 3961 static void gaudi_pre_hw_init(struct hl_device *hdev) 3962 { 3963 /* Perform read from the device to make sure device is up */ 3964 RREG32(mmHW_STATE); 3965 3966 if (!hdev->asic_prop.fw_security_enabled) { 3967 /* Set the access through PCI bars (Linux driver only) as 3968 * secured 3969 */ 3970 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3971 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3972 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3973 3974 /* Perform read to flush the waiting writes to ensure 3975 * configuration was set in the device 3976 */ 3977 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3978 } 3979 3980 /* 3981 * Let's mark in the H/W that we have reached this point. We check 3982 * this value in the reset_before_init function to understand whether 3983 * we need to reset the chip before doing H/W init. This register is 3984 * cleared by the H/W upon H/W reset 3985 */ 3986 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3987 } 3988 3989 static int gaudi_hw_init(struct hl_device *hdev) 3990 { 3991 struct gaudi_device *gaudi = hdev->asic_specific; 3992 int rc; 3993 3994 gaudi_pre_hw_init(hdev); 3995 3996 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3997 * So we set it here and if anyone tries to move it later to 3998 * a different address, there will be an error 3999 */ 4000 if (hdev->asic_prop.iatu_done_by_fw) 4001 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 4002 4003 /* 4004 * Before pushing u-boot/linux to device, need to set the hbm bar to 4005 * base address of dram 4006 */ 4007 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 4008 dev_err(hdev->dev, 4009 "failed to map HBM bar to DRAM base address\n"); 4010 return -EIO; 4011 } 4012 4013 rc = gaudi_init_cpu(hdev); 4014 if (rc) { 4015 dev_err(hdev->dev, "failed to initialize CPU\n"); 4016 return rc; 4017 } 4018 4019 /* In case the clock gating was enabled in preboot we need to disable 4020 * it here before touching the MME/TPC registers. 4021 */ 4022 gaudi_disable_clock_gating(hdev); 4023 4024 /* SRAM scrambler must be initialized after CPU is running from HBM */ 4025 gaudi_init_scrambler_sram(hdev); 4026 4027 /* This is here just in case we are working without CPU */ 4028 gaudi_init_scrambler_hbm(hdev); 4029 4030 gaudi_init_golden_registers(hdev); 4031 4032 rc = gaudi_mmu_init(hdev); 4033 if (rc) 4034 return rc; 4035 4036 gaudi_init_security(hdev); 4037 4038 gaudi_init_pci_dma_qmans(hdev); 4039 4040 gaudi_init_hbm_dma_qmans(hdev); 4041 4042 gaudi_init_mme_qmans(hdev); 4043 4044 gaudi_init_tpc_qmans(hdev); 4045 4046 gaudi_init_nic_qmans(hdev); 4047 4048 gaudi_enable_timestamp(hdev); 4049 4050 /* MSI must be enabled before CPU queues and NIC are initialized */ 4051 rc = gaudi_enable_msi(hdev); 4052 if (rc) 4053 goto disable_queues; 4054 4055 /* must be called after MSI was enabled */ 4056 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 4057 if (rc) { 4058 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 4059 rc); 4060 goto disable_msi; 4061 } 4062 4063 /* Perform read from the device to flush all configuration */ 4064 RREG32(mmHW_STATE); 4065 4066 return 0; 4067 4068 disable_msi: 4069 gaudi_disable_msi(hdev); 4070 disable_queues: 4071 gaudi_disable_mme_qmans(hdev); 4072 gaudi_disable_pci_dma_qmans(hdev); 4073 4074 return rc; 4075 } 4076 4077 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4078 { 4079 struct cpu_dyn_regs *dyn_regs = 4080 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4081 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4082 struct gaudi_device *gaudi = hdev->asic_specific; 4083 bool driver_performs_reset; 4084 4085 if (!hard_reset) { 4086 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4087 return 0; 4088 } 4089 4090 if (hdev->pldm) { 4091 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4092 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4093 } else { 4094 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4095 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4096 } 4097 4098 if (fw_reset) { 4099 dev_dbg(hdev->dev, 4100 "Firmware performs HARD reset, going to wait %dms\n", 4101 reset_timeout_ms); 4102 4103 goto skip_reset; 4104 } 4105 4106 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4107 !hdev->asic_prop.hard_reset_done_by_fw); 4108 4109 /* Set device to handle FLR by H/W as we will put the device CPU to 4110 * halt mode 4111 */ 4112 if (driver_performs_reset) 4113 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4114 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4115 4116 /* If linux is loaded in the device CPU we need to communicate with it 4117 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4118 * registers in case of old F/Ws 4119 */ 4120 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4121 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4122 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4123 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4124 4125 WREG32(irq_handler_offset, 4126 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4127 4128 /* This is a hail-mary attempt to revive the card in the small chance that the 4129 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4130 * In that case, triggering reset through GIC won't help. We need to trigger the 4131 * reset as if Linux wasn't loaded. 4132 * 4133 * We do it only if the reset cause was HB, because that would be the indication 4134 * of such an event. 4135 * 4136 * In case watchdog hasn't expired but we still got HB, then this won't do any 4137 * damage. 4138 */ 4139 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4140 if (hdev->asic_prop.hard_reset_done_by_fw) 4141 hl_fw_ask_hard_reset_without_linux(hdev); 4142 else 4143 hl_fw_ask_halt_machine_without_linux(hdev); 4144 } 4145 } else { 4146 if (hdev->asic_prop.hard_reset_done_by_fw) 4147 hl_fw_ask_hard_reset_without_linux(hdev); 4148 else 4149 hl_fw_ask_halt_machine_without_linux(hdev); 4150 } 4151 4152 if (driver_performs_reset) { 4153 4154 /* Configure the reset registers. Must be done as early as 4155 * possible in case we fail during H/W initialization 4156 */ 4157 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4158 (CFG_RST_H_DMA_MASK | 4159 CFG_RST_H_MME_MASK | 4160 CFG_RST_H_SM_MASK | 4161 CFG_RST_H_TPC_7_MASK)); 4162 4163 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4164 4165 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4166 (CFG_RST_H_HBM_MASK | 4167 CFG_RST_H_TPC_7_MASK | 4168 CFG_RST_H_NIC_MASK | 4169 CFG_RST_H_SM_MASK | 4170 CFG_RST_H_DMA_MASK | 4171 CFG_RST_H_MME_MASK | 4172 CFG_RST_H_CPU_MASK | 4173 CFG_RST_H_MMU_MASK)); 4174 4175 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4176 (CFG_RST_L_IF_MASK | 4177 CFG_RST_L_PSOC_MASK | 4178 CFG_RST_L_TPC_MASK)); 4179 4180 msleep(cpu_timeout_ms); 4181 4182 /* Tell ASIC not to re-initialize PCIe */ 4183 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4184 4185 /* Restart BTL/BLR upon hard-reset */ 4186 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4187 4188 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4189 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4190 4191 dev_dbg(hdev->dev, 4192 "Issued HARD reset command, going to wait %dms\n", 4193 reset_timeout_ms); 4194 } else { 4195 dev_dbg(hdev->dev, 4196 "Firmware performs HARD reset, going to wait %dms\n", 4197 reset_timeout_ms); 4198 } 4199 4200 skip_reset: 4201 /* 4202 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4203 * itself is in reset. Need to wait until the reset is deasserted 4204 */ 4205 msleep(reset_timeout_ms); 4206 4207 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4208 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4209 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4210 return -ETIMEDOUT; 4211 } 4212 4213 if (gaudi) { 4214 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4215 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4216 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4217 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4218 HW_CAP_HBM_SCRAMBLER); 4219 4220 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4221 4222 hdev->device_cpu_is_halted = false; 4223 } 4224 return 0; 4225 } 4226 4227 static int gaudi_suspend(struct hl_device *hdev) 4228 { 4229 int rc; 4230 4231 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4232 if (rc) 4233 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4234 4235 return rc; 4236 } 4237 4238 static int gaudi_resume(struct hl_device *hdev) 4239 { 4240 return gaudi_init_iatu(hdev); 4241 } 4242 4243 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4244 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4245 { 4246 int rc; 4247 4248 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4249 VM_DONTCOPY | VM_NORESERVE); 4250 4251 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4252 (dma_addr - HOST_PHYS_BASE), size); 4253 if (rc) 4254 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4255 4256 return rc; 4257 } 4258 4259 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4260 { 4261 struct cpu_dyn_regs *dyn_regs = 4262 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4263 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4264 struct gaudi_device *gaudi = hdev->asic_specific; 4265 bool invalid_queue = false; 4266 int dma_id; 4267 4268 switch (hw_queue_id) { 4269 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4270 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4271 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4272 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4273 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4274 break; 4275 4276 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4277 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4278 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4279 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4280 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4281 break; 4282 4283 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4284 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4285 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4286 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4287 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4288 break; 4289 4290 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4291 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4292 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4293 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4294 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4295 break; 4296 4297 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4298 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4299 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4300 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4301 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4302 break; 4303 4304 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4305 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4306 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4307 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4308 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4309 break; 4310 4311 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4312 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4313 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4314 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4315 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4316 break; 4317 4318 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4319 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4320 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4321 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4322 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4323 break; 4324 4325 case GAUDI_QUEUE_ID_CPU_PQ: 4326 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4327 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4328 else 4329 invalid_queue = true; 4330 break; 4331 4332 case GAUDI_QUEUE_ID_MME_0_0: 4333 db_reg_offset = mmMME2_QM_PQ_PI_0; 4334 break; 4335 4336 case GAUDI_QUEUE_ID_MME_0_1: 4337 db_reg_offset = mmMME2_QM_PQ_PI_1; 4338 break; 4339 4340 case GAUDI_QUEUE_ID_MME_0_2: 4341 db_reg_offset = mmMME2_QM_PQ_PI_2; 4342 break; 4343 4344 case GAUDI_QUEUE_ID_MME_0_3: 4345 db_reg_offset = mmMME2_QM_PQ_PI_3; 4346 break; 4347 4348 case GAUDI_QUEUE_ID_MME_1_0: 4349 db_reg_offset = mmMME0_QM_PQ_PI_0; 4350 break; 4351 4352 case GAUDI_QUEUE_ID_MME_1_1: 4353 db_reg_offset = mmMME0_QM_PQ_PI_1; 4354 break; 4355 4356 case GAUDI_QUEUE_ID_MME_1_2: 4357 db_reg_offset = mmMME0_QM_PQ_PI_2; 4358 break; 4359 4360 case GAUDI_QUEUE_ID_MME_1_3: 4361 db_reg_offset = mmMME0_QM_PQ_PI_3; 4362 break; 4363 4364 case GAUDI_QUEUE_ID_TPC_0_0: 4365 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4366 break; 4367 4368 case GAUDI_QUEUE_ID_TPC_0_1: 4369 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4370 break; 4371 4372 case GAUDI_QUEUE_ID_TPC_0_2: 4373 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4374 break; 4375 4376 case GAUDI_QUEUE_ID_TPC_0_3: 4377 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4378 break; 4379 4380 case GAUDI_QUEUE_ID_TPC_1_0: 4381 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4382 break; 4383 4384 case GAUDI_QUEUE_ID_TPC_1_1: 4385 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4386 break; 4387 4388 case GAUDI_QUEUE_ID_TPC_1_2: 4389 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4390 break; 4391 4392 case GAUDI_QUEUE_ID_TPC_1_3: 4393 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4394 break; 4395 4396 case GAUDI_QUEUE_ID_TPC_2_0: 4397 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4398 break; 4399 4400 case GAUDI_QUEUE_ID_TPC_2_1: 4401 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4402 break; 4403 4404 case GAUDI_QUEUE_ID_TPC_2_2: 4405 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4406 break; 4407 4408 case GAUDI_QUEUE_ID_TPC_2_3: 4409 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4410 break; 4411 4412 case GAUDI_QUEUE_ID_TPC_3_0: 4413 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4414 break; 4415 4416 case GAUDI_QUEUE_ID_TPC_3_1: 4417 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4418 break; 4419 4420 case GAUDI_QUEUE_ID_TPC_3_2: 4421 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4422 break; 4423 4424 case GAUDI_QUEUE_ID_TPC_3_3: 4425 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4426 break; 4427 4428 case GAUDI_QUEUE_ID_TPC_4_0: 4429 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4430 break; 4431 4432 case GAUDI_QUEUE_ID_TPC_4_1: 4433 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4434 break; 4435 4436 case GAUDI_QUEUE_ID_TPC_4_2: 4437 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4438 break; 4439 4440 case GAUDI_QUEUE_ID_TPC_4_3: 4441 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4442 break; 4443 4444 case GAUDI_QUEUE_ID_TPC_5_0: 4445 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4446 break; 4447 4448 case GAUDI_QUEUE_ID_TPC_5_1: 4449 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4450 break; 4451 4452 case GAUDI_QUEUE_ID_TPC_5_2: 4453 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4454 break; 4455 4456 case GAUDI_QUEUE_ID_TPC_5_3: 4457 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4458 break; 4459 4460 case GAUDI_QUEUE_ID_TPC_6_0: 4461 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4462 break; 4463 4464 case GAUDI_QUEUE_ID_TPC_6_1: 4465 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4466 break; 4467 4468 case GAUDI_QUEUE_ID_TPC_6_2: 4469 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4470 break; 4471 4472 case GAUDI_QUEUE_ID_TPC_6_3: 4473 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4474 break; 4475 4476 case GAUDI_QUEUE_ID_TPC_7_0: 4477 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4478 break; 4479 4480 case GAUDI_QUEUE_ID_TPC_7_1: 4481 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4482 break; 4483 4484 case GAUDI_QUEUE_ID_TPC_7_2: 4485 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4486 break; 4487 4488 case GAUDI_QUEUE_ID_TPC_7_3: 4489 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4490 break; 4491 4492 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4493 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4494 invalid_queue = true; 4495 4496 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4497 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4498 break; 4499 4500 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4501 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4502 invalid_queue = true; 4503 4504 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4505 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4506 break; 4507 4508 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4509 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4510 invalid_queue = true; 4511 4512 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4513 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4514 break; 4515 4516 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4517 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4518 invalid_queue = true; 4519 4520 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4521 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4522 break; 4523 4524 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4525 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4526 invalid_queue = true; 4527 4528 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4529 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4530 break; 4531 4532 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4533 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4534 invalid_queue = true; 4535 4536 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4537 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4538 break; 4539 4540 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4541 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4542 invalid_queue = true; 4543 4544 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4545 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4546 break; 4547 4548 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4549 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4550 invalid_queue = true; 4551 4552 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4553 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4554 break; 4555 4556 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4557 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4558 invalid_queue = true; 4559 4560 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4561 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4562 break; 4563 4564 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4565 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4566 invalid_queue = true; 4567 4568 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4569 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4570 break; 4571 4572 default: 4573 invalid_queue = true; 4574 } 4575 4576 if (invalid_queue) { 4577 /* Should never get here */ 4578 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4579 hw_queue_id); 4580 return; 4581 } 4582 4583 db_value = pi; 4584 4585 /* ring the doorbell */ 4586 WREG32(db_reg_offset, db_value); 4587 4588 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4589 /* make sure device CPU will read latest data from host */ 4590 mb(); 4591 4592 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4593 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4594 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4595 4596 WREG32(irq_handler_offset, 4597 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4598 } 4599 } 4600 4601 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4602 struct hl_bd *bd) 4603 { 4604 __le64 *pbd = (__le64 *) bd; 4605 4606 /* The QMANs are on the host memory so a simple copy suffice */ 4607 pqe[0] = pbd[0]; 4608 pqe[1] = pbd[1]; 4609 } 4610 4611 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4612 dma_addr_t *dma_handle, gfp_t flags) 4613 { 4614 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4615 dma_handle, flags); 4616 4617 /* Shift to the device's base physical address of host memory */ 4618 if (kernel_addr) 4619 *dma_handle += HOST_PHYS_BASE; 4620 4621 return kernel_addr; 4622 } 4623 4624 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4625 void *cpu_addr, dma_addr_t dma_handle) 4626 { 4627 /* Cancel the device's base physical address of host memory */ 4628 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4629 4630 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4631 } 4632 4633 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4634 { 4635 struct asic_fixed_properties *prop = &hdev->asic_prop; 4636 u64 cur_addr = prop->dram_user_base_address; 4637 u32 chunk_size, busy; 4638 int rc, dma_id; 4639 4640 while (cur_addr < prop->dram_end_address) { 4641 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4642 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4643 4644 chunk_size = 4645 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4646 4647 dev_dbg(hdev->dev, 4648 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4649 cur_addr, cur_addr + chunk_size); 4650 4651 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4652 lower_32_bits(val)); 4653 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4654 upper_32_bits(val)); 4655 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4656 lower_32_bits(cur_addr)); 4657 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4658 upper_32_bits(cur_addr)); 4659 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4660 chunk_size); 4661 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4662 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4663 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4664 4665 cur_addr += chunk_size; 4666 4667 if (cur_addr == prop->dram_end_address) 4668 break; 4669 } 4670 4671 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4672 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4673 4674 rc = hl_poll_timeout( 4675 hdev, 4676 mmDMA0_CORE_STS0 + dma_offset, 4677 busy, 4678 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4679 1000, 4680 HBM_SCRUBBING_TIMEOUT_US); 4681 4682 if (rc) { 4683 dev_err(hdev->dev, 4684 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4685 dma_id); 4686 return -EIO; 4687 } 4688 } 4689 } 4690 4691 return 0; 4692 } 4693 4694 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4695 { 4696 struct asic_fixed_properties *prop = &hdev->asic_prop; 4697 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : 4698 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); 4699 u64 addr, size, val = hdev->memory_scrub_val; 4700 ktime_t timeout; 4701 int rc = 0; 4702 4703 if (!hdev->memory_scrub) 4704 return 0; 4705 4706 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4707 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4708 if (ktime_compare(ktime_get(), timeout) > 0) { 4709 dev_err(hdev->dev, "waiting for idle timeout\n"); 4710 return -ETIMEDOUT; 4711 } 4712 usleep_range((1000 >> 2) + 1, 1000); 4713 } 4714 4715 /* Scrub SRAM */ 4716 addr = prop->sram_user_base_address; 4717 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4718 4719 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4720 addr, addr + size, val); 4721 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4722 if (rc) { 4723 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4724 return rc; 4725 } 4726 4727 /* Scrub HBM using all DMA channels in parallel */ 4728 rc = gaudi_scrub_device_dram(hdev, val); 4729 if (rc) { 4730 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4731 return rc; 4732 } 4733 4734 return 0; 4735 } 4736 4737 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4738 u32 queue_id, dma_addr_t *dma_handle, 4739 u16 *queue_len) 4740 { 4741 struct gaudi_device *gaudi = hdev->asic_specific; 4742 struct gaudi_internal_qman_info *q; 4743 4744 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4745 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4746 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4747 return NULL; 4748 } 4749 4750 q = &gaudi->internal_qmans[queue_id]; 4751 *dma_handle = q->pq_dma_addr; 4752 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4753 4754 return q->pq_kernel_addr; 4755 } 4756 4757 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4758 u16 len, u32 timeout, u64 *result) 4759 { 4760 struct gaudi_device *gaudi = hdev->asic_specific; 4761 4762 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4763 if (result) 4764 *result = 0; 4765 return 0; 4766 } 4767 4768 if (!timeout) 4769 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4770 4771 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4772 timeout, result); 4773 } 4774 4775 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4776 { 4777 struct packet_msg_prot *fence_pkt; 4778 dma_addr_t pkt_dma_addr; 4779 u32 fence_val, tmp, timeout_usec; 4780 dma_addr_t fence_dma_addr; 4781 u32 *fence_ptr; 4782 int rc; 4783 4784 if (hdev->pldm) 4785 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4786 else 4787 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4788 4789 fence_val = GAUDI_QMAN0_FENCE_VAL; 4790 4791 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4792 if (!fence_ptr) { 4793 dev_err(hdev->dev, 4794 "Failed to allocate memory for H/W queue %d testing\n", 4795 hw_queue_id); 4796 return -ENOMEM; 4797 } 4798 4799 *fence_ptr = 0; 4800 4801 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4802 &pkt_dma_addr); 4803 if (!fence_pkt) { 4804 dev_err(hdev->dev, 4805 "Failed to allocate packet for H/W queue %d testing\n", 4806 hw_queue_id); 4807 rc = -ENOMEM; 4808 goto free_fence_ptr; 4809 } 4810 4811 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4812 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4813 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4814 4815 fence_pkt->ctl = cpu_to_le32(tmp); 4816 fence_pkt->value = cpu_to_le32(fence_val); 4817 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4818 4819 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4820 sizeof(struct packet_msg_prot), 4821 pkt_dma_addr); 4822 if (rc) { 4823 dev_err(hdev->dev, 4824 "Failed to send fence packet to H/W queue %d\n", 4825 hw_queue_id); 4826 goto free_pkt; 4827 } 4828 4829 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4830 1000, timeout_usec, true); 4831 4832 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4833 4834 if (rc == -ETIMEDOUT) { 4835 dev_err(hdev->dev, 4836 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4837 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4838 rc = -EIO; 4839 } 4840 4841 free_pkt: 4842 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4843 free_fence_ptr: 4844 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4845 return rc; 4846 } 4847 4848 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4849 { 4850 struct gaudi_device *gaudi = hdev->asic_specific; 4851 4852 /* 4853 * check capability here as send_cpu_message() won't update the result 4854 * value if no capability 4855 */ 4856 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4857 return 0; 4858 4859 return hl_fw_test_cpu_queue(hdev); 4860 } 4861 4862 static int gaudi_test_queues(struct hl_device *hdev) 4863 { 4864 int i, rc, ret_val = 0; 4865 4866 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4867 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4868 rc = gaudi_test_queue(hdev, i); 4869 if (rc) 4870 ret_val = -EINVAL; 4871 } 4872 } 4873 4874 rc = gaudi_test_cpu_queue(hdev); 4875 if (rc) 4876 ret_val = -EINVAL; 4877 4878 return ret_val; 4879 } 4880 4881 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4882 gfp_t mem_flags, dma_addr_t *dma_handle) 4883 { 4884 void *kernel_addr; 4885 4886 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4887 return NULL; 4888 4889 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4890 4891 /* Shift to the device's base physical address of host memory */ 4892 if (kernel_addr) 4893 *dma_handle += HOST_PHYS_BASE; 4894 4895 return kernel_addr; 4896 } 4897 4898 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4899 dma_addr_t dma_addr) 4900 { 4901 /* Cancel the device's base physical address of host memory */ 4902 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4903 4904 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4905 } 4906 4907 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4908 size_t size, dma_addr_t *dma_handle) 4909 { 4910 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4911 } 4912 4913 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4914 size_t size, void *vaddr) 4915 { 4916 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4917 } 4918 4919 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4920 { 4921 struct scatterlist *sg, *sg_next_iter; 4922 u32 count, dma_desc_cnt; 4923 u64 len, len_next; 4924 dma_addr_t addr, addr_next; 4925 4926 dma_desc_cnt = 0; 4927 4928 for_each_sgtable_dma_sg(sgt, sg, count) { 4929 len = sg_dma_len(sg); 4930 addr = sg_dma_address(sg); 4931 4932 if (len == 0) 4933 break; 4934 4935 while ((count + 1) < sgt->nents) { 4936 sg_next_iter = sg_next(sg); 4937 len_next = sg_dma_len(sg_next_iter); 4938 addr_next = sg_dma_address(sg_next_iter); 4939 4940 if (len_next == 0) 4941 break; 4942 4943 if ((addr + len == addr_next) && 4944 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4945 len += len_next; 4946 count++; 4947 sg = sg_next_iter; 4948 } else { 4949 break; 4950 } 4951 } 4952 4953 dma_desc_cnt++; 4954 } 4955 4956 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4957 } 4958 4959 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4960 struct hl_cs_parser *parser, 4961 struct packet_lin_dma *user_dma_pkt, 4962 u64 addr, enum dma_data_direction dir) 4963 { 4964 struct hl_userptr *userptr; 4965 int rc; 4966 4967 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4968 parser->job_userptr_list, &userptr)) 4969 goto already_pinned; 4970 4971 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4972 if (!userptr) 4973 return -ENOMEM; 4974 4975 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4976 userptr); 4977 if (rc) 4978 goto free_userptr; 4979 4980 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4981 4982 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 4983 if (rc) { 4984 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4985 goto unpin_memory; 4986 } 4987 4988 userptr->dma_mapped = true; 4989 userptr->dir = dir; 4990 4991 already_pinned: 4992 parser->patched_cb_size += 4993 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4994 4995 return 0; 4996 4997 unpin_memory: 4998 list_del(&userptr->job_node); 4999 hl_unpin_host_memory(hdev, userptr); 5000 free_userptr: 5001 kfree(userptr); 5002 return rc; 5003 } 5004 5005 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 5006 struct hl_cs_parser *parser, 5007 struct packet_lin_dma *user_dma_pkt, 5008 bool src_in_host) 5009 { 5010 enum dma_data_direction dir; 5011 bool skip_host_mem_pin = false, user_memset; 5012 u64 addr; 5013 int rc = 0; 5014 5015 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 5016 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5017 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5018 5019 if (src_in_host) { 5020 if (user_memset) 5021 skip_host_mem_pin = true; 5022 5023 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 5024 dir = DMA_TO_DEVICE; 5025 addr = le64_to_cpu(user_dma_pkt->src_addr); 5026 } else { 5027 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 5028 dir = DMA_FROM_DEVICE; 5029 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5030 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5031 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5032 } 5033 5034 if (skip_host_mem_pin) 5035 parser->patched_cb_size += sizeof(*user_dma_pkt); 5036 else 5037 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 5038 addr, dir); 5039 5040 return rc; 5041 } 5042 5043 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 5044 struct hl_cs_parser *parser, 5045 struct packet_lin_dma *user_dma_pkt) 5046 { 5047 bool src_in_host = false; 5048 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5049 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5050 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5051 5052 dev_dbg(hdev->dev, "DMA packet details:\n"); 5053 dev_dbg(hdev->dev, "source == 0x%llx\n", 5054 le64_to_cpu(user_dma_pkt->src_addr)); 5055 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 5056 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 5057 5058 /* 5059 * Special handling for DMA with size 0. Bypass all validations 5060 * because no transactions will be done except for WR_COMP, which 5061 * is not a security issue 5062 */ 5063 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5064 parser->patched_cb_size += sizeof(*user_dma_pkt); 5065 return 0; 5066 } 5067 5068 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5069 src_in_host = true; 5070 5071 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5072 src_in_host); 5073 } 5074 5075 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5076 struct hl_cs_parser *parser, 5077 struct packet_load_and_exe *user_pkt) 5078 { 5079 u32 cfg; 5080 5081 cfg = le32_to_cpu(user_pkt->cfg); 5082 5083 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5084 dev_err(hdev->dev, 5085 "User not allowed to use Load and Execute\n"); 5086 return -EPERM; 5087 } 5088 5089 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5090 5091 return 0; 5092 } 5093 5094 static int gaudi_validate_cb(struct hl_device *hdev, 5095 struct hl_cs_parser *parser, bool is_mmu) 5096 { 5097 u32 cb_parsed_length = 0; 5098 int rc = 0; 5099 5100 parser->patched_cb_size = 0; 5101 5102 /* cb_user_size is more than 0 so loop will always be executed */ 5103 while (cb_parsed_length < parser->user_cb_size) { 5104 enum packet_id pkt_id; 5105 u16 pkt_size; 5106 struct gaudi_packet *user_pkt; 5107 5108 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5109 5110 pkt_id = (enum packet_id) ( 5111 (le64_to_cpu(user_pkt->header) & 5112 PACKET_HEADER_PACKET_ID_MASK) >> 5113 PACKET_HEADER_PACKET_ID_SHIFT); 5114 5115 if (!validate_packet_id(pkt_id)) { 5116 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5117 rc = -EINVAL; 5118 break; 5119 } 5120 5121 pkt_size = gaudi_packet_sizes[pkt_id]; 5122 cb_parsed_length += pkt_size; 5123 if (cb_parsed_length > parser->user_cb_size) { 5124 dev_err(hdev->dev, 5125 "packet 0x%x is out of CB boundary\n", pkt_id); 5126 rc = -EINVAL; 5127 break; 5128 } 5129 5130 switch (pkt_id) { 5131 case PACKET_MSG_PROT: 5132 dev_err(hdev->dev, 5133 "User not allowed to use MSG_PROT\n"); 5134 rc = -EPERM; 5135 break; 5136 5137 case PACKET_CP_DMA: 5138 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5139 rc = -EPERM; 5140 break; 5141 5142 case PACKET_STOP: 5143 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5144 rc = -EPERM; 5145 break; 5146 5147 case PACKET_WREG_BULK: 5148 dev_err(hdev->dev, 5149 "User not allowed to use WREG_BULK\n"); 5150 rc = -EPERM; 5151 break; 5152 5153 case PACKET_LOAD_AND_EXE: 5154 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5155 (struct packet_load_and_exe *) user_pkt); 5156 break; 5157 5158 case PACKET_LIN_DMA: 5159 parser->contains_dma_pkt = true; 5160 if (is_mmu) 5161 parser->patched_cb_size += pkt_size; 5162 else 5163 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5164 (struct packet_lin_dma *) user_pkt); 5165 break; 5166 5167 case PACKET_WREG_32: 5168 case PACKET_MSG_LONG: 5169 case PACKET_MSG_SHORT: 5170 case PACKET_REPEAT: 5171 case PACKET_FENCE: 5172 case PACKET_NOP: 5173 case PACKET_ARB_POINT: 5174 parser->patched_cb_size += pkt_size; 5175 break; 5176 5177 default: 5178 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5179 pkt_id); 5180 rc = -EINVAL; 5181 break; 5182 } 5183 5184 if (rc) 5185 break; 5186 } 5187 5188 /* 5189 * The new CB should have space at the end for two MSG_PROT packets: 5190 * 1. Optional NOP padding for cacheline alignment 5191 * 2. A packet that will act as a completion packet 5192 * 3. A packet that will generate MSI interrupt 5193 */ 5194 if (parser->completion) 5195 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5196 parser->patched_cb_size); 5197 5198 return rc; 5199 } 5200 5201 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5202 struct hl_cs_parser *parser, 5203 struct packet_lin_dma *user_dma_pkt, 5204 struct packet_lin_dma *new_dma_pkt, 5205 u32 *new_dma_pkt_size) 5206 { 5207 struct hl_userptr *userptr; 5208 struct scatterlist *sg, *sg_next_iter; 5209 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5210 u64 len, len_next; 5211 dma_addr_t dma_addr, dma_addr_next; 5212 u64 device_memory_addr, addr; 5213 enum dma_data_direction dir; 5214 struct sg_table *sgt; 5215 bool src_in_host = false; 5216 bool skip_host_mem_pin = false; 5217 bool user_memset; 5218 5219 ctl = le32_to_cpu(user_dma_pkt->ctl); 5220 5221 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5222 src_in_host = true; 5223 5224 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5225 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5226 5227 if (src_in_host) { 5228 addr = le64_to_cpu(user_dma_pkt->src_addr); 5229 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5230 dir = DMA_TO_DEVICE; 5231 if (user_memset) 5232 skip_host_mem_pin = true; 5233 } else { 5234 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5235 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5236 dir = DMA_FROM_DEVICE; 5237 } 5238 5239 if ((!skip_host_mem_pin) && 5240 (!hl_userptr_is_pinned(hdev, addr, 5241 le32_to_cpu(user_dma_pkt->tsize), 5242 parser->job_userptr_list, &userptr))) { 5243 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5244 addr, user_dma_pkt->tsize); 5245 return -EFAULT; 5246 } 5247 5248 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5249 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5250 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5251 return 0; 5252 } 5253 5254 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5255 5256 sgt = userptr->sgt; 5257 dma_desc_cnt = 0; 5258 5259 for_each_sgtable_dma_sg(sgt, sg, count) { 5260 len = sg_dma_len(sg); 5261 dma_addr = sg_dma_address(sg); 5262 5263 if (len == 0) 5264 break; 5265 5266 while ((count + 1) < sgt->nents) { 5267 sg_next_iter = sg_next(sg); 5268 len_next = sg_dma_len(sg_next_iter); 5269 dma_addr_next = sg_dma_address(sg_next_iter); 5270 5271 if (len_next == 0) 5272 break; 5273 5274 if ((dma_addr + len == dma_addr_next) && 5275 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5276 len += len_next; 5277 count++; 5278 sg = sg_next_iter; 5279 } else { 5280 break; 5281 } 5282 } 5283 5284 ctl = le32_to_cpu(user_dma_pkt->ctl); 5285 if (likely(dma_desc_cnt)) 5286 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5287 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5288 new_dma_pkt->ctl = cpu_to_le32(ctl); 5289 new_dma_pkt->tsize = cpu_to_le32(len); 5290 5291 if (dir == DMA_TO_DEVICE) { 5292 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5293 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5294 } else { 5295 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5296 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5297 } 5298 5299 if (!user_memset) 5300 device_memory_addr += len; 5301 dma_desc_cnt++; 5302 new_dma_pkt++; 5303 } 5304 5305 if (!dma_desc_cnt) { 5306 dev_err(hdev->dev, 5307 "Error of 0 SG entries when patching DMA packet\n"); 5308 return -EFAULT; 5309 } 5310 5311 /* Fix the last dma packet - wrcomp must be as user set it */ 5312 new_dma_pkt--; 5313 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5314 5315 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5316 5317 return 0; 5318 } 5319 5320 static int gaudi_patch_cb(struct hl_device *hdev, 5321 struct hl_cs_parser *parser) 5322 { 5323 u32 cb_parsed_length = 0; 5324 u32 cb_patched_cur_length = 0; 5325 int rc = 0; 5326 5327 /* cb_user_size is more than 0 so loop will always be executed */ 5328 while (cb_parsed_length < parser->user_cb_size) { 5329 enum packet_id pkt_id; 5330 u16 pkt_size; 5331 u32 new_pkt_size = 0; 5332 struct gaudi_packet *user_pkt, *kernel_pkt; 5333 5334 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5335 kernel_pkt = parser->patched_cb->kernel_address + 5336 cb_patched_cur_length; 5337 5338 pkt_id = (enum packet_id) ( 5339 (le64_to_cpu(user_pkt->header) & 5340 PACKET_HEADER_PACKET_ID_MASK) >> 5341 PACKET_HEADER_PACKET_ID_SHIFT); 5342 5343 if (!validate_packet_id(pkt_id)) { 5344 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5345 rc = -EINVAL; 5346 break; 5347 } 5348 5349 pkt_size = gaudi_packet_sizes[pkt_id]; 5350 cb_parsed_length += pkt_size; 5351 if (cb_parsed_length > parser->user_cb_size) { 5352 dev_err(hdev->dev, 5353 "packet 0x%x is out of CB boundary\n", pkt_id); 5354 rc = -EINVAL; 5355 break; 5356 } 5357 5358 switch (pkt_id) { 5359 case PACKET_LIN_DMA: 5360 rc = gaudi_patch_dma_packet(hdev, parser, 5361 (struct packet_lin_dma *) user_pkt, 5362 (struct packet_lin_dma *) kernel_pkt, 5363 &new_pkt_size); 5364 cb_patched_cur_length += new_pkt_size; 5365 break; 5366 5367 case PACKET_MSG_PROT: 5368 dev_err(hdev->dev, 5369 "User not allowed to use MSG_PROT\n"); 5370 rc = -EPERM; 5371 break; 5372 5373 case PACKET_CP_DMA: 5374 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5375 rc = -EPERM; 5376 break; 5377 5378 case PACKET_STOP: 5379 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5380 rc = -EPERM; 5381 break; 5382 5383 case PACKET_WREG_32: 5384 case PACKET_WREG_BULK: 5385 case PACKET_MSG_LONG: 5386 case PACKET_MSG_SHORT: 5387 case PACKET_REPEAT: 5388 case PACKET_FENCE: 5389 case PACKET_NOP: 5390 case PACKET_ARB_POINT: 5391 case PACKET_LOAD_AND_EXE: 5392 memcpy(kernel_pkt, user_pkt, pkt_size); 5393 cb_patched_cur_length += pkt_size; 5394 break; 5395 5396 default: 5397 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5398 pkt_id); 5399 rc = -EINVAL; 5400 break; 5401 } 5402 5403 if (rc) 5404 break; 5405 } 5406 5407 return rc; 5408 } 5409 5410 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5411 struct hl_cs_parser *parser) 5412 { 5413 u64 handle; 5414 u32 patched_cb_size; 5415 struct hl_cb *user_cb; 5416 int rc; 5417 5418 /* 5419 * The new CB should have space at the end for two MSG_PROT packets: 5420 * 1. Optional NOP padding for cacheline alignment 5421 * 2. A packet that will act as a completion packet 5422 * 3. A packet that will generate MSI interrupt 5423 */ 5424 if (parser->completion) 5425 parser->patched_cb_size = parser->user_cb_size + 5426 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5427 else 5428 parser->patched_cb_size = parser->user_cb_size; 5429 5430 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5431 parser->patched_cb_size, false, false, 5432 &handle); 5433 5434 if (rc) { 5435 dev_err(hdev->dev, 5436 "Failed to allocate patched CB for DMA CS %d\n", 5437 rc); 5438 return rc; 5439 } 5440 5441 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5442 /* hl_cb_get should never fail */ 5443 if (!parser->patched_cb) { 5444 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5445 rc = -EFAULT; 5446 goto out; 5447 } 5448 5449 /* 5450 * We are protected from overflow because the check 5451 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5452 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5453 * 5454 * There is no option to reach here without going through that check because: 5455 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5456 * an external queue. 5457 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5458 */ 5459 memcpy(parser->patched_cb->kernel_address, 5460 parser->user_cb->kernel_address, 5461 parser->user_cb_size); 5462 5463 patched_cb_size = parser->patched_cb_size; 5464 5465 /* Validate patched CB instead of user CB */ 5466 user_cb = parser->user_cb; 5467 parser->user_cb = parser->patched_cb; 5468 rc = gaudi_validate_cb(hdev, parser, true); 5469 parser->user_cb = user_cb; 5470 5471 if (rc) { 5472 hl_cb_put(parser->patched_cb); 5473 goto out; 5474 } 5475 5476 if (patched_cb_size != parser->patched_cb_size) { 5477 dev_err(hdev->dev, "user CB size mismatch\n"); 5478 hl_cb_put(parser->patched_cb); 5479 rc = -EINVAL; 5480 goto out; 5481 } 5482 5483 out: 5484 /* 5485 * Always call cb destroy here because we still have 1 reference 5486 * to it by calling cb_get earlier. After the job will be completed, 5487 * cb_put will release it, but here we want to remove it from the 5488 * idr 5489 */ 5490 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5491 5492 return rc; 5493 } 5494 5495 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5496 struct hl_cs_parser *parser) 5497 { 5498 u64 handle; 5499 int rc; 5500 5501 rc = gaudi_validate_cb(hdev, parser, false); 5502 5503 if (rc) 5504 goto free_userptr; 5505 5506 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5507 parser->patched_cb_size, false, false, 5508 &handle); 5509 if (rc) { 5510 dev_err(hdev->dev, 5511 "Failed to allocate patched CB for DMA CS %d\n", rc); 5512 goto free_userptr; 5513 } 5514 5515 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5516 /* hl_cb_get should never fail here */ 5517 if (!parser->patched_cb) { 5518 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5519 rc = -EFAULT; 5520 goto out; 5521 } 5522 5523 rc = gaudi_patch_cb(hdev, parser); 5524 5525 if (rc) 5526 hl_cb_put(parser->patched_cb); 5527 5528 out: 5529 /* 5530 * Always call cb destroy here because we still have 1 reference 5531 * to it by calling cb_get earlier. After the job will be completed, 5532 * cb_put will release it, but here we want to remove it from the 5533 * idr 5534 */ 5535 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5536 5537 free_userptr: 5538 if (rc) 5539 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5540 return rc; 5541 } 5542 5543 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5544 struct hl_cs_parser *parser) 5545 { 5546 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5547 struct gaudi_device *gaudi = hdev->asic_specific; 5548 u32 nic_queue_offset, nic_mask_q_id; 5549 5550 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5551 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5552 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5553 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5554 5555 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5556 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5557 return -EINVAL; 5558 } 5559 } 5560 5561 /* For internal queue jobs just check if CB address is valid */ 5562 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5563 parser->user_cb_size, 5564 asic_prop->sram_user_base_address, 5565 asic_prop->sram_end_address)) 5566 return 0; 5567 5568 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5569 parser->user_cb_size, 5570 asic_prop->dram_user_base_address, 5571 asic_prop->dram_end_address)) 5572 return 0; 5573 5574 /* PMMU and HPMMU addresses are equal, check only one of them */ 5575 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5576 parser->user_cb_size, 5577 asic_prop->pmmu.start_addr, 5578 asic_prop->pmmu.end_addr)) 5579 return 0; 5580 5581 dev_err(hdev->dev, 5582 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5583 parser->user_cb, parser->user_cb_size); 5584 5585 return -EFAULT; 5586 } 5587 5588 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5589 { 5590 struct gaudi_device *gaudi = hdev->asic_specific; 5591 5592 if (parser->queue_type == QUEUE_TYPE_INT) 5593 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5594 5595 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5596 return gaudi_parse_cb_mmu(hdev, parser); 5597 else 5598 return gaudi_parse_cb_no_mmu(hdev, parser); 5599 } 5600 5601 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5602 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5603 u32 msi_vec, bool eb) 5604 { 5605 struct gaudi_device *gaudi = hdev->asic_specific; 5606 struct packet_msg_prot *cq_pkt; 5607 struct packet_nop *cq_padding; 5608 u64 msi_addr; 5609 u32 tmp; 5610 5611 cq_padding = kernel_address + original_len; 5612 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5613 5614 while ((void *)cq_padding < (void *)cq_pkt) { 5615 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5616 cq_padding++; 5617 } 5618 5619 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5620 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5621 5622 if (eb) 5623 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5624 5625 cq_pkt->ctl = cpu_to_le32(tmp); 5626 cq_pkt->value = cpu_to_le32(cq_val); 5627 cq_pkt->addr = cpu_to_le64(cq_addr); 5628 5629 cq_pkt++; 5630 5631 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5632 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5633 cq_pkt->ctl = cpu_to_le32(tmp); 5634 cq_pkt->value = cpu_to_le32(1); 5635 5636 if (gaudi->multi_msi_mode) 5637 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; 5638 else 5639 msi_addr = mmPCIE_CORE_MSI_REQ; 5640 5641 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5642 } 5643 5644 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5645 { 5646 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5647 } 5648 5649 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5650 u32 size, u64 val) 5651 { 5652 struct packet_lin_dma *lin_dma_pkt; 5653 struct hl_cs_job *job; 5654 u32 cb_size, ctl, err_cause; 5655 struct hl_cb *cb; 5656 int rc; 5657 5658 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5659 if (!cb) 5660 return -EFAULT; 5661 5662 lin_dma_pkt = cb->kernel_address; 5663 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5664 cb_size = sizeof(*lin_dma_pkt); 5665 5666 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5667 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5668 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5669 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5670 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5671 5672 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5673 lin_dma_pkt->src_addr = cpu_to_le64(val); 5674 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5675 lin_dma_pkt->tsize = cpu_to_le32(size); 5676 5677 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5678 if (!job) { 5679 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5680 rc = -ENOMEM; 5681 goto release_cb; 5682 } 5683 5684 /* Verify DMA is OK */ 5685 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5686 if (err_cause && !hdev->init_done) { 5687 dev_dbg(hdev->dev, 5688 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5689 err_cause); 5690 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5691 } 5692 5693 job->id = 0; 5694 job->user_cb = cb; 5695 atomic_inc(&job->user_cb->cs_cnt); 5696 job->user_cb_size = cb_size; 5697 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5698 job->patched_cb = job->user_cb; 5699 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5700 5701 hl_debugfs_add_job(hdev, job); 5702 5703 rc = gaudi_send_job_on_qman0(hdev, job); 5704 hl_debugfs_remove_job(hdev, job); 5705 kfree(job); 5706 atomic_dec(&cb->cs_cnt); 5707 5708 /* Verify DMA is OK */ 5709 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5710 if (err_cause) { 5711 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5712 rc = -EIO; 5713 if (!hdev->init_done) { 5714 dev_dbg(hdev->dev, 5715 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5716 err_cause); 5717 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5718 } 5719 } 5720 5721 release_cb: 5722 hl_cb_put(cb); 5723 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5724 5725 return rc; 5726 } 5727 5728 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5729 u32 num_regs, u32 val) 5730 { 5731 struct packet_msg_long *pkt; 5732 struct hl_cs_job *job; 5733 u32 cb_size, ctl; 5734 struct hl_cb *cb; 5735 int i, rc; 5736 5737 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5738 5739 if (cb_size > SZ_2M) { 5740 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5741 return -ENOMEM; 5742 } 5743 5744 cb = hl_cb_kernel_create(hdev, cb_size, false); 5745 if (!cb) 5746 return -EFAULT; 5747 5748 pkt = cb->kernel_address; 5749 5750 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5751 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5752 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5753 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5754 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5755 5756 for (i = 0; i < num_regs ; i++, pkt++) { 5757 pkt->ctl = cpu_to_le32(ctl); 5758 pkt->value = cpu_to_le32(val); 5759 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5760 } 5761 5762 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5763 if (!job) { 5764 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5765 rc = -ENOMEM; 5766 goto release_cb; 5767 } 5768 5769 job->id = 0; 5770 job->user_cb = cb; 5771 atomic_inc(&job->user_cb->cs_cnt); 5772 job->user_cb_size = cb_size; 5773 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5774 job->patched_cb = job->user_cb; 5775 job->job_cb_size = cb_size; 5776 5777 hl_debugfs_add_job(hdev, job); 5778 5779 rc = gaudi_send_job_on_qman0(hdev, job); 5780 hl_debugfs_remove_job(hdev, job); 5781 kfree(job); 5782 atomic_dec(&cb->cs_cnt); 5783 5784 release_cb: 5785 hl_cb_put(cb); 5786 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5787 5788 return rc; 5789 } 5790 5791 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5792 { 5793 u64 base_addr; 5794 u32 num_regs; 5795 int rc; 5796 5797 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5798 num_regs = NUM_OF_SOB_IN_BLOCK; 5799 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5800 if (rc) { 5801 dev_err(hdev->dev, "failed resetting SM registers"); 5802 return -ENOMEM; 5803 } 5804 5805 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5806 num_regs = NUM_OF_SOB_IN_BLOCK; 5807 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5808 if (rc) { 5809 dev_err(hdev->dev, "failed resetting SM registers"); 5810 return -ENOMEM; 5811 } 5812 5813 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5814 num_regs = NUM_OF_SOB_IN_BLOCK; 5815 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5816 if (rc) { 5817 dev_err(hdev->dev, "failed resetting SM registers"); 5818 return -ENOMEM; 5819 } 5820 5821 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5822 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5823 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5824 if (rc) { 5825 dev_err(hdev->dev, "failed resetting SM registers"); 5826 return -ENOMEM; 5827 } 5828 5829 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5830 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5831 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5832 if (rc) { 5833 dev_err(hdev->dev, "failed resetting SM registers"); 5834 return -ENOMEM; 5835 } 5836 5837 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5838 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5839 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5840 if (rc) { 5841 dev_err(hdev->dev, "failed resetting SM registers"); 5842 return -ENOMEM; 5843 } 5844 5845 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5846 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5847 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5848 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5849 if (rc) { 5850 dev_err(hdev->dev, "failed resetting SM registers"); 5851 return -ENOMEM; 5852 } 5853 5854 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5855 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5856 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5857 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5858 if (rc) { 5859 dev_err(hdev->dev, "failed resetting SM registers"); 5860 return -ENOMEM; 5861 } 5862 5863 return 0; 5864 } 5865 5866 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5867 { 5868 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5869 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5870 int i; 5871 5872 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5873 u64 sob_addr = CFG_BASE + 5874 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5875 (i * sob_delta); 5876 u32 dma_offset = i * DMA_CORE_OFFSET; 5877 5878 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5879 lower_32_bits(sob_addr)); 5880 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5881 upper_32_bits(sob_addr)); 5882 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5883 5884 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5885 * modified by the user for SRAM reduction 5886 */ 5887 if (i > 1) 5888 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5889 0x00000001); 5890 } 5891 } 5892 5893 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5894 { 5895 u32 qman_offset; 5896 int i; 5897 5898 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5899 qman_offset = i * DMA_QMAN_OFFSET; 5900 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5901 } 5902 5903 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5904 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5905 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5906 } 5907 5908 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5909 qman_offset = i * TPC_QMAN_OFFSET; 5910 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5911 } 5912 5913 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5914 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5915 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5916 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5917 } 5918 } 5919 5920 static int gaudi_restore_user_registers(struct hl_device *hdev) 5921 { 5922 int rc; 5923 5924 rc = gaudi_restore_sm_registers(hdev); 5925 if (rc) 5926 return rc; 5927 5928 gaudi_restore_dma_registers(hdev); 5929 gaudi_restore_qm_registers(hdev); 5930 5931 return 0; 5932 } 5933 5934 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5935 { 5936 return 0; 5937 } 5938 5939 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5940 { 5941 u32 size = hdev->asic_prop.mmu_pgt_size + 5942 hdev->asic_prop.mmu_cache_mng_size; 5943 struct gaudi_device *gaudi = hdev->asic_specific; 5944 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5945 5946 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5947 return 0; 5948 5949 return gaudi_memset_device_memory(hdev, addr, size, 0); 5950 } 5951 5952 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5953 { 5954 5955 } 5956 5957 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5958 u32 size_to_dma, dma_addr_t dma_addr) 5959 { 5960 u32 err_cause, val; 5961 u64 dma_offset; 5962 int rc; 5963 5964 dma_offset = dma_id * DMA_CORE_OFFSET; 5965 5966 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5967 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5968 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5969 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5970 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5971 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5972 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5973 5974 rc = hl_poll_timeout( 5975 hdev, 5976 mmDMA0_CORE_STS0 + dma_offset, 5977 val, 5978 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5979 0, 5980 1000000); 5981 5982 if (rc) { 5983 dev_err(hdev->dev, 5984 "DMA %d timed-out during reading of 0x%llx\n", 5985 dma_id, addr); 5986 return -EIO; 5987 } 5988 5989 /* Verify DMA is OK */ 5990 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5991 if (err_cause) { 5992 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5993 dev_dbg(hdev->dev, 5994 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5995 err_cause); 5996 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5997 5998 return -EIO; 5999 } 6000 6001 return 0; 6002 } 6003 6004 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 6005 void *blob_addr) 6006 { 6007 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 6008 u32 qm_glbl_sts0, qm_cgm_sts; 6009 u64 dma_offset, qm_offset; 6010 dma_addr_t dma_addr; 6011 void *kernel_addr; 6012 bool is_eng_idle; 6013 int rc = 0, dma_id; 6014 6015 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 6016 6017 if (!kernel_addr) 6018 return -ENOMEM; 6019 6020 hdev->asic_funcs->hw_queues_lock(hdev); 6021 6022 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 6023 dma_offset = dma_id * DMA_CORE_OFFSET; 6024 qm_offset = dma_id * DMA_QMAN_OFFSET; 6025 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6026 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6027 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6028 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6029 IS_DMA_IDLE(dma_core_sts0); 6030 6031 if (!is_eng_idle) { 6032 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 6033 dma_offset = dma_id * DMA_CORE_OFFSET; 6034 qm_offset = dma_id * DMA_QMAN_OFFSET; 6035 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6036 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6037 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6038 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6039 IS_DMA_IDLE(dma_core_sts0); 6040 6041 if (!is_eng_idle) { 6042 dev_err_ratelimited(hdev->dev, 6043 "Can't read via DMA because it is BUSY\n"); 6044 rc = -EAGAIN; 6045 goto out; 6046 } 6047 } 6048 6049 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 6050 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 6051 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 6052 6053 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6054 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6055 * ASID 6056 */ 6057 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6058 6059 /* Verify DMA is OK */ 6060 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6061 if (err_cause) { 6062 dev_dbg(hdev->dev, 6063 "Clearing DMA0 engine from errors (cause 0x%x)\n", 6064 err_cause); 6065 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 6066 } 6067 6068 pos = 0; 6069 size_left = size; 6070 size_to_dma = SZ_2M; 6071 6072 while (size_left > 0) { 6073 6074 if (size_left < SZ_2M) 6075 size_to_dma = size_left; 6076 6077 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6078 dma_addr); 6079 if (rc) 6080 break; 6081 6082 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6083 6084 if (size_left <= SZ_2M) 6085 break; 6086 6087 pos += SZ_2M; 6088 addr += SZ_2M; 6089 size_left -= SZ_2M; 6090 } 6091 6092 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6093 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6094 * ASID 6095 */ 6096 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6097 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6098 6099 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6100 6101 out: 6102 hdev->asic_funcs->hw_queues_unlock(hdev); 6103 6104 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6105 6106 return rc; 6107 } 6108 6109 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6110 { 6111 struct gaudi_device *gaudi = hdev->asic_specific; 6112 6113 if (hdev->reset_info.hard_reset_pending) 6114 return U64_MAX; 6115 6116 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6117 (addr - gaudi->hbm_bar_cur_addr)); 6118 } 6119 6120 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6121 { 6122 struct gaudi_device *gaudi = hdev->asic_specific; 6123 6124 if (hdev->reset_info.hard_reset_pending) 6125 return; 6126 6127 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6128 (addr - gaudi->hbm_bar_cur_addr)); 6129 } 6130 6131 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6132 { 6133 /* mask to zero the MMBP and ASID bits */ 6134 WREG32_AND(reg, ~0x7FF); 6135 WREG32_OR(reg, asid); 6136 } 6137 6138 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6139 { 6140 struct gaudi_device *gaudi = hdev->asic_specific; 6141 6142 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6143 return; 6144 6145 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6146 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6147 return; 6148 } 6149 6150 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6152 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6155 6156 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6160 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6161 6162 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6167 6168 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6173 6174 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6176 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6179 6180 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6184 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6185 6186 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6191 6192 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6197 6198 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6206 6207 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6213 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6214 6215 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6216 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6217 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6218 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6219 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6220 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6221 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6222 6223 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6224 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6225 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6226 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6227 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6228 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6229 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6230 6231 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6232 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6233 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6234 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6235 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6236 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6237 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6238 6239 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6240 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6241 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6242 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6243 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6244 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6245 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6246 6247 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6248 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6249 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6250 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6251 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6252 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6253 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6254 6255 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6256 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6257 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6258 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6259 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6260 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6261 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6262 6263 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6264 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6265 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6266 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6267 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6268 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6269 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6270 6271 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6272 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6273 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6274 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6275 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6276 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6277 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6278 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6279 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6280 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6281 6282 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6283 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6284 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6285 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6286 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6287 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6288 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6289 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6290 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6291 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6292 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6293 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6294 6295 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6296 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6297 asid); 6298 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6299 asid); 6300 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6301 asid); 6302 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6303 asid); 6304 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6305 asid); 6306 } 6307 6308 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6309 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6310 asid); 6311 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6312 asid); 6313 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6314 asid); 6315 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6316 asid); 6317 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6318 asid); 6319 } 6320 6321 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6322 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6323 asid); 6324 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6325 asid); 6326 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6327 asid); 6328 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6329 asid); 6330 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6331 asid); 6332 } 6333 6334 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6335 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6336 asid); 6337 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6338 asid); 6339 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6340 asid); 6341 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6342 asid); 6343 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6344 asid); 6345 } 6346 6347 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6348 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6349 asid); 6350 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6351 asid); 6352 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6353 asid); 6354 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6355 asid); 6356 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6357 asid); 6358 } 6359 6360 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6361 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6362 asid); 6363 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6364 asid); 6365 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6366 asid); 6367 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6368 asid); 6369 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6370 asid); 6371 } 6372 6373 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6374 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6375 asid); 6376 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6377 asid); 6378 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6379 asid); 6380 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6381 asid); 6382 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6383 asid); 6384 } 6385 6386 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6387 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6388 asid); 6389 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6390 asid); 6391 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6392 asid); 6393 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6394 asid); 6395 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6396 asid); 6397 } 6398 6399 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6400 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6401 asid); 6402 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6403 asid); 6404 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6405 asid); 6406 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6407 asid); 6408 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6409 asid); 6410 } 6411 6412 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6413 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6414 asid); 6415 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6416 asid); 6417 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6418 asid); 6419 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6420 asid); 6421 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6422 asid); 6423 } 6424 6425 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6426 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6427 } 6428 6429 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6430 struct hl_cs_job *job) 6431 { 6432 struct packet_msg_prot *fence_pkt; 6433 u32 *fence_ptr; 6434 dma_addr_t fence_dma_addr; 6435 struct hl_cb *cb; 6436 u32 tmp, timeout, dma_offset; 6437 int rc; 6438 6439 if (hdev->pldm) 6440 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6441 else 6442 timeout = HL_DEVICE_TIMEOUT_USEC; 6443 6444 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6445 if (!fence_ptr) { 6446 dev_err(hdev->dev, 6447 "Failed to allocate fence memory for QMAN0\n"); 6448 return -ENOMEM; 6449 } 6450 6451 cb = job->patched_cb; 6452 6453 fence_pkt = cb->kernel_address + 6454 job->job_cb_size - sizeof(struct packet_msg_prot); 6455 6456 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6457 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6458 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6459 6460 fence_pkt->ctl = cpu_to_le32(tmp); 6461 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6462 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6463 6464 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6465 6466 WREG32(mmDMA0_CORE_PROT + dma_offset, 6467 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6468 6469 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6470 job->job_cb_size, cb->bus_address); 6471 if (rc) { 6472 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6473 goto free_fence_ptr; 6474 } 6475 6476 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6477 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6478 timeout, true); 6479 6480 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6481 6482 if (rc == -ETIMEDOUT) { 6483 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6484 goto free_fence_ptr; 6485 } 6486 6487 free_fence_ptr: 6488 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6489 6490 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6491 return rc; 6492 } 6493 6494 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6495 { 6496 if (event_type >= GAUDI_EVENT_SIZE) 6497 goto event_not_supported; 6498 6499 if (!gaudi_irq_map_table[event_type].valid) 6500 goto event_not_supported; 6501 6502 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6503 6504 return; 6505 6506 event_not_supported: 6507 snprintf(desc, size, "N/A"); 6508 } 6509 6510 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6511 bool is_write, u16 *engine_id_1, 6512 u16 *engine_id_2) 6513 { 6514 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6515 6516 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6517 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6518 6519 switch (x_y) { 6520 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6521 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6522 dma_id[0] = 0; 6523 dma_id[1] = 2; 6524 break; 6525 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6526 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6527 dma_id[0] = 1; 6528 dma_id[1] = 3; 6529 break; 6530 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6531 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6532 dma_id[0] = 4; 6533 dma_id[1] = 6; 6534 break; 6535 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6536 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6537 dma_id[0] = 5; 6538 dma_id[1] = 7; 6539 break; 6540 default: 6541 goto unknown_initiator; 6542 } 6543 6544 for (i = 0 ; i < 2 ; i++) { 6545 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6546 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6547 } 6548 6549 switch (x_y) { 6550 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6551 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6552 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6553 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6554 return "DMA0"; 6555 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6556 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6557 return "DMA2"; 6558 } else { 6559 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6560 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6561 return "DMA0 or DMA2"; 6562 } 6563 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6564 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6565 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6566 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6567 return "DMA1"; 6568 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6569 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6570 return "DMA3"; 6571 } else { 6572 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6573 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6574 return "DMA1 or DMA3"; 6575 } 6576 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6578 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6579 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6580 return "DMA4"; 6581 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6582 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6583 return "DMA6"; 6584 } else { 6585 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6586 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6587 return "DMA4 or DMA6"; 6588 } 6589 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6590 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6591 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6592 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6593 return "DMA5"; 6594 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6595 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6596 return "DMA7"; 6597 } else { 6598 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6599 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6600 return "DMA5 or DMA7"; 6601 } 6602 } 6603 6604 unknown_initiator: 6605 return "unknown initiator"; 6606 } 6607 6608 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6609 u16 *engine_id_1, u16 *engine_id_2) 6610 { 6611 u32 val, x_y, axi_id; 6612 6613 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6614 RREG32(mmMMU_UP_RAZWI_READ_ID); 6615 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6616 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6617 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6618 RAZWI_INITIATOR_AXI_ID_SHIFT); 6619 6620 switch (x_y) { 6621 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6622 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6623 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6624 return "TPC0"; 6625 } 6626 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6627 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6628 return "NIC0"; 6629 } 6630 break; 6631 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6632 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6633 return "TPC1"; 6634 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6635 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6636 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6637 return "MME0"; 6638 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6639 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6640 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6641 return "MME1"; 6642 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6643 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6644 return "TPC2"; 6645 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6646 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6647 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6648 return "TPC3"; 6649 } 6650 /* PCI, CPU or PSOC does not have engine id*/ 6651 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6652 return "PCI"; 6653 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6654 return "CPU"; 6655 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6656 return "PSOC"; 6657 break; 6658 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6659 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6660 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6661 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6662 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6663 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6664 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6665 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6666 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6667 engine_id_1, engine_id_2); 6668 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6669 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6670 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6671 return "TPC4"; 6672 } 6673 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6674 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6675 return "NIC1"; 6676 } 6677 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6678 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6679 return "NIC2"; 6680 } 6681 break; 6682 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6683 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6684 return "TPC5"; 6685 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6686 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6687 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6688 return "MME2"; 6689 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6690 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6691 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6692 return "MME3"; 6693 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6694 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6695 return "TPC6"; 6696 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6697 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6698 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6699 return "TPC7"; 6700 } 6701 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6702 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6703 return "NIC4"; 6704 } 6705 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6706 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6707 return "NIC5"; 6708 } 6709 break; 6710 default: 6711 break; 6712 } 6713 6714 dev_err(hdev->dev, 6715 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6716 val, 6717 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6718 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6719 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6720 RAZWI_INITIATOR_AXI_ID_MASK); 6721 6722 return "unknown initiator"; 6723 } 6724 6725 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6726 u16 *engine_id_2, bool *is_read, bool *is_write) 6727 { 6728 6729 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6730 dev_err_ratelimited(hdev->dev, 6731 "RAZWI event caused by illegal write of %s\n", 6732 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6733 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6734 *is_write = true; 6735 } 6736 6737 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6738 dev_err_ratelimited(hdev->dev, 6739 "RAZWI event caused by illegal read of %s\n", 6740 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6741 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6742 *is_read = true; 6743 } 6744 } 6745 6746 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6747 { 6748 struct gaudi_device *gaudi = hdev->asic_specific; 6749 u32 val; 6750 6751 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6752 return; 6753 6754 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6755 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6756 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6757 *addr <<= 32; 6758 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6759 6760 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6761 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6762 6763 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6764 } 6765 6766 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6767 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6768 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6769 *addr <<= 32; 6770 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6771 6772 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6773 6774 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6775 } 6776 } 6777 6778 /* 6779 * +-------------------+------------------------------------------------------+ 6780 * | Configuration Reg | Description | 6781 * | Address | | 6782 * +-------------------+------------------------------------------------------+ 6783 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6784 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6785 * | |0xF34 memory wrappers 63:32 | 6786 * | |0xF38 memory wrappers 95:64 | 6787 * | |0xF3C memory wrappers 127:96 | 6788 * +-------------------+------------------------------------------------------+ 6789 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6790 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6791 * | |0xF44 memory wrappers 63:32 | 6792 * | |0xF48 memory wrappers 95:64 | 6793 * | |0xF4C memory wrappers 127:96 | 6794 * +-------------------+------------------------------------------------------+ 6795 */ 6796 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6797 struct ecc_info_extract_params *params, u64 *ecc_address, 6798 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6799 { 6800 u32 i, num_mem_regs, reg, err_bit; 6801 u64 err_addr, err_word = 0; 6802 6803 num_mem_regs = params->num_memories / 32 + 6804 ((params->num_memories % 32) ? 1 : 0); 6805 6806 if (params->block_address >= CFG_BASE) 6807 params->block_address -= CFG_BASE; 6808 6809 if (params->derr) 6810 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6811 else 6812 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6813 6814 /* Set invalid wrapper index */ 6815 *memory_wrapper_idx = 0xFF; 6816 6817 /* Iterate through memory wrappers, a single bit must be set */ 6818 for (i = 0 ; i < num_mem_regs ; i++) { 6819 err_addr += i * 4; 6820 err_word = RREG32(err_addr); 6821 if (err_word) { 6822 err_bit = __ffs(err_word); 6823 *memory_wrapper_idx = err_bit + (32 * i); 6824 break; 6825 } 6826 } 6827 6828 if (*memory_wrapper_idx == 0xFF) { 6829 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6830 return -EINVAL; 6831 } 6832 6833 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6834 *memory_wrapper_idx); 6835 6836 *ecc_address = 6837 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6838 *ecc_syndrom = 6839 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6840 6841 /* Clear error indication */ 6842 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6843 if (params->derr) 6844 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6845 else 6846 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6847 6848 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6849 6850 return 0; 6851 } 6852 6853 /* 6854 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6855 * 6856 * @idx: the current pi/ci value 6857 * @q_len: the queue length (power of 2) 6858 * 6859 * @return the cyclically decremented index 6860 */ 6861 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6862 { 6863 u32 mask = q_len - 1; 6864 6865 /* 6866 * modular decrement is equivalent to adding (queue_size -1) 6867 * later we take LSBs to make sure the value is in the 6868 * range [0, queue_len - 1] 6869 */ 6870 return (idx + q_len - 1) & mask; 6871 } 6872 6873 /** 6874 * gaudi_handle_sw_config_stream_data - print SW config stream data 6875 * 6876 * @hdev: pointer to the habanalabs device structure 6877 * @stream: the QMAN's stream 6878 * @qman_base: base address of QMAN registers block 6879 * @event_mask: mask of the last events occurred 6880 */ 6881 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6882 u64 qman_base, u64 event_mask) 6883 { 6884 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6885 u32 cq_ptr_lo_off, size; 6886 6887 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6888 6889 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6890 stream * cq_ptr_lo_off; 6891 cq_ptr_hi = cq_ptr_lo + 6892 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6893 cq_tsize = cq_ptr_lo + 6894 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6895 6896 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6897 size = RREG32(cq_tsize); 6898 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6899 stream, cq_ptr, size); 6900 6901 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6902 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6903 hdev->captured_err_info.undef_opcode.cq_size = size; 6904 hdev->captured_err_info.undef_opcode.stream_id = stream; 6905 } 6906 } 6907 6908 /** 6909 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6910 * 6911 * @hdev: pointer to the habanalabs device structure 6912 * @qid_base: first QID of the QMAN (out of 4 streams) 6913 * @stream: the QMAN's stream 6914 * @qman_base: base address of QMAN registers block 6915 * @event_mask: mask of the last events occurred 6916 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6917 */ 6918 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6919 u32 stream, u64 qman_base, 6920 u64 event_mask, 6921 bool pr_sw_conf) 6922 { 6923 u32 ci, qm_ci_stream_off, queue_len; 6924 struct hl_hw_queue *q; 6925 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6926 int i; 6927 6928 q = &hdev->kernel_queues[qid_base + stream]; 6929 6930 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6931 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6932 stream * qm_ci_stream_off; 6933 6934 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6935 q->int_queue_len : HL_QUEUE_LENGTH; 6936 6937 hdev->asic_funcs->hw_queues_lock(hdev); 6938 6939 if (pr_sw_conf) 6940 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6941 6942 ci = RREG32(pq_ci); 6943 6944 /* we should start printing form ci -1 */ 6945 ci = gaudi_queue_idx_dec(ci, queue_len); 6946 memset(addr, 0, sizeof(addr)); 6947 6948 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6949 struct hl_bd *bd; 6950 u32 len; 6951 6952 bd = q->kernel_address; 6953 bd += ci; 6954 6955 len = le32_to_cpu(bd->len); 6956 /* len 0 means uninitialized entry- break */ 6957 if (!len) 6958 break; 6959 6960 addr[i] = le64_to_cpu(bd->ptr); 6961 6962 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6963 stream, ci, addr[i], len); 6964 6965 /* get previous ci, wrap if needed */ 6966 ci = gaudi_queue_idx_dec(ci, queue_len); 6967 } 6968 6969 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6970 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6971 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6972 6973 if (arr_idx == 0) { 6974 undef_opcode->timestamp = ktime_get(); 6975 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6976 } 6977 6978 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6979 undef_opcode->cb_addr_streams_len++; 6980 } 6981 6982 hdev->asic_funcs->hw_queues_unlock(hdev); 6983 } 6984 6985 /** 6986 * handle_qman_data_on_err - extract QMAN data on error 6987 * 6988 * @hdev: pointer to the habanalabs device structure 6989 * @qid_base: first QID of the QMAN (out of 4 streams) 6990 * @stream: the QMAN's stream 6991 * @qman_base: base address of QMAN registers block 6992 * @event_mask: mask of the last events occurred 6993 * 6994 * This function attempt to exatract as much data as possible on QMAN error. 6995 * On upper CP print the SW config stream data and last 8 PQEs. 6996 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6997 */ 6998 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6999 u32 stream, u64 qman_base, u64 event_mask) 7000 { 7001 u32 i; 7002 7003 if (stream != QMAN_STREAMS) { 7004 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 7005 qman_base, event_mask, true); 7006 return; 7007 } 7008 7009 /* handle Lower-CP */ 7010 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 7011 7012 for (i = 0; i < QMAN_STREAMS; i++) 7013 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 7014 qman_base, event_mask, false); 7015 } 7016 7017 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 7018 const char *qm_name, 7019 u64 qman_base, 7020 u32 qid_base, 7021 u64 *event_mask) 7022 { 7023 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 7024 u64 glbl_sts_addr, arb_err_addr; 7025 char reg_desc[32]; 7026 7027 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 7028 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 7029 7030 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 7031 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7032 glbl_sts_clr_val = 0; 7033 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7034 7035 if (!glbl_sts_val) 7036 continue; 7037 7038 if (i == QMAN_STREAMS) 7039 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7040 else 7041 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7042 7043 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 7044 if (glbl_sts_val & BIT(j)) { 7045 dev_err_ratelimited(hdev->dev, 7046 "%s %s. err cause: %s\n", 7047 qm_name, reg_desc, 7048 gaudi_qman_error_cause[j]); 7049 glbl_sts_clr_val |= BIT(j); 7050 } 7051 } 7052 /* check for undefined opcode */ 7053 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 7054 hdev->captured_err_info.undef_opcode.write_enable) { 7055 memset(&hdev->captured_err_info.undef_opcode, 0, 7056 sizeof(hdev->captured_err_info.undef_opcode)); 7057 7058 hdev->captured_err_info.undef_opcode.write_enable = false; 7059 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 7060 } 7061 7062 /* Write 1 clear errors */ 7063 if (!hdev->stop_on_err) 7064 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 7065 else 7066 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 7067 } 7068 7069 arb_err_val = RREG32(arb_err_addr); 7070 7071 if (!arb_err_val) 7072 return; 7073 7074 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7075 if (arb_err_val & BIT(j)) { 7076 dev_err_ratelimited(hdev->dev, 7077 "%s ARB_ERR. err cause: %s\n", 7078 qm_name, 7079 gaudi_qman_arb_error_cause[j]); 7080 } 7081 } 7082 } 7083 7084 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7085 struct hl_eq_sm_sei_data *sei_data) 7086 { 7087 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7088 7089 /* Flip the bits as the enum is ordered in the opposite way */ 7090 index = (index ^ 0x3) & 0x3; 7091 7092 switch (sei_data->sei_cause) { 7093 case SM_SEI_SO_OVERFLOW: 7094 dev_err_ratelimited(hdev->dev, 7095 "%s SEI Error: SOB Group %u overflow/underflow", 7096 gaudi_sync_manager_names[index], 7097 le32_to_cpu(sei_data->sei_log)); 7098 break; 7099 case SM_SEI_LBW_4B_UNALIGNED: 7100 dev_err_ratelimited(hdev->dev, 7101 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7102 gaudi_sync_manager_names[index], 7103 le32_to_cpu(sei_data->sei_log)); 7104 break; 7105 case SM_SEI_AXI_RESPONSE_ERR: 7106 dev_err_ratelimited(hdev->dev, 7107 "%s SEI Error: AXI ID %u response error", 7108 gaudi_sync_manager_names[index], 7109 le32_to_cpu(sei_data->sei_log)); 7110 break; 7111 default: 7112 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7113 le32_to_cpu(sei_data->sei_log)); 7114 break; 7115 } 7116 } 7117 7118 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7119 struct hl_eq_ecc_data *ecc_data) 7120 { 7121 struct ecc_info_extract_params params; 7122 u64 ecc_address = 0, ecc_syndrom = 0; 7123 u8 index, memory_wrapper_idx = 0; 7124 bool extract_info_from_fw; 7125 int rc; 7126 7127 if (hdev->asic_prop.fw_security_enabled) { 7128 extract_info_from_fw = true; 7129 goto extract_ecc_info; 7130 } 7131 7132 switch (event_type) { 7133 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7134 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7135 extract_info_from_fw = true; 7136 break; 7137 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7138 index = event_type - GAUDI_EVENT_TPC0_SERR; 7139 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7140 params.num_memories = 90; 7141 params.derr = false; 7142 extract_info_from_fw = false; 7143 break; 7144 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7145 index = event_type - GAUDI_EVENT_TPC0_DERR; 7146 params.block_address = 7147 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7148 params.num_memories = 90; 7149 params.derr = true; 7150 extract_info_from_fw = false; 7151 break; 7152 case GAUDI_EVENT_MME0_ACC_SERR: 7153 case GAUDI_EVENT_MME1_ACC_SERR: 7154 case GAUDI_EVENT_MME2_ACC_SERR: 7155 case GAUDI_EVENT_MME3_ACC_SERR: 7156 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7157 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7158 params.num_memories = 128; 7159 params.derr = false; 7160 extract_info_from_fw = false; 7161 break; 7162 case GAUDI_EVENT_MME0_ACC_DERR: 7163 case GAUDI_EVENT_MME1_ACC_DERR: 7164 case GAUDI_EVENT_MME2_ACC_DERR: 7165 case GAUDI_EVENT_MME3_ACC_DERR: 7166 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7167 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7168 params.num_memories = 128; 7169 params.derr = true; 7170 extract_info_from_fw = false; 7171 break; 7172 case GAUDI_EVENT_MME0_SBAB_SERR: 7173 case GAUDI_EVENT_MME1_SBAB_SERR: 7174 case GAUDI_EVENT_MME2_SBAB_SERR: 7175 case GAUDI_EVENT_MME3_SBAB_SERR: 7176 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7177 params.block_address = 7178 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7179 params.num_memories = 33; 7180 params.derr = false; 7181 extract_info_from_fw = false; 7182 break; 7183 case GAUDI_EVENT_MME0_SBAB_DERR: 7184 case GAUDI_EVENT_MME1_SBAB_DERR: 7185 case GAUDI_EVENT_MME2_SBAB_DERR: 7186 case GAUDI_EVENT_MME3_SBAB_DERR: 7187 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7188 params.block_address = 7189 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7190 params.num_memories = 33; 7191 params.derr = true; 7192 extract_info_from_fw = false; 7193 break; 7194 default: 7195 return; 7196 } 7197 7198 extract_ecc_info: 7199 if (extract_info_from_fw) { 7200 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7201 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7202 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7203 } else { 7204 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7205 &ecc_syndrom, &memory_wrapper_idx); 7206 if (rc) 7207 return; 7208 } 7209 7210 dev_err(hdev->dev, 7211 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7212 ecc_address, ecc_syndrom, memory_wrapper_idx); 7213 } 7214 7215 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7216 { 7217 u64 qman_base; 7218 char desc[32]; 7219 u32 qid_base; 7220 u8 index; 7221 7222 switch (event_type) { 7223 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7224 index = event_type - GAUDI_EVENT_TPC0_QM; 7225 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7226 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7227 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7228 break; 7229 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7230 if (event_type == GAUDI_EVENT_MME0_QM) { 7231 index = 0; 7232 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7233 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7234 index = 2; 7235 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7236 } 7237 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7238 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7239 break; 7240 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7241 index = event_type - GAUDI_EVENT_DMA0_QM; 7242 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7243 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7244 if (index > 1) 7245 qid_base++; 7246 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7247 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7248 break; 7249 case GAUDI_EVENT_NIC0_QM0: 7250 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7251 qman_base = mmNIC0_QM0_BASE; 7252 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7253 break; 7254 case GAUDI_EVENT_NIC0_QM1: 7255 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7256 qman_base = mmNIC0_QM1_BASE; 7257 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7258 break; 7259 case GAUDI_EVENT_NIC1_QM0: 7260 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7261 qman_base = mmNIC1_QM0_BASE; 7262 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7263 break; 7264 case GAUDI_EVENT_NIC1_QM1: 7265 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7266 qman_base = mmNIC1_QM1_BASE; 7267 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7268 break; 7269 case GAUDI_EVENT_NIC2_QM0: 7270 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7271 qman_base = mmNIC2_QM0_BASE; 7272 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7273 break; 7274 case GAUDI_EVENT_NIC2_QM1: 7275 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7276 qman_base = mmNIC2_QM1_BASE; 7277 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7278 break; 7279 case GAUDI_EVENT_NIC3_QM0: 7280 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7281 qman_base = mmNIC3_QM0_BASE; 7282 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7283 break; 7284 case GAUDI_EVENT_NIC3_QM1: 7285 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7286 qman_base = mmNIC3_QM1_BASE; 7287 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7288 break; 7289 case GAUDI_EVENT_NIC4_QM0: 7290 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7291 qman_base = mmNIC4_QM0_BASE; 7292 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7293 break; 7294 case GAUDI_EVENT_NIC4_QM1: 7295 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7296 qman_base = mmNIC4_QM1_BASE; 7297 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7298 break; 7299 default: 7300 return; 7301 } 7302 7303 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7304 } 7305 7306 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7307 bool check_razwi, u64 *event_mask) 7308 { 7309 bool is_read = false, is_write = false; 7310 u16 engine_id[2], num_of_razwi_eng = 0; 7311 char desc[64] = ""; 7312 u64 razwi_addr = 0; 7313 u8 razwi_flags = 0; 7314 7315 /* 7316 * Init engine id by default as not valid and only if razwi initiated from engine with 7317 * engine id it will get valid value. 7318 */ 7319 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7320 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7321 7322 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7323 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7324 event_type, desc); 7325 7326 if (check_razwi) { 7327 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7328 &is_write); 7329 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7330 7331 if (is_read) 7332 razwi_flags |= HL_RAZWI_READ; 7333 if (is_write) 7334 razwi_flags |= HL_RAZWI_WRITE; 7335 7336 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7337 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7338 num_of_razwi_eng = 2; 7339 else 7340 num_of_razwi_eng = 1; 7341 } 7342 7343 if (razwi_flags) 7344 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7345 razwi_flags, event_mask); 7346 } 7347 } 7348 7349 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7350 struct cpucp_pkt_sync_err *sync_err) 7351 { 7352 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7353 7354 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7355 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7356 } 7357 7358 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7359 struct hl_eq_fw_alive *fw_alive) 7360 { 7361 dev_err(hdev->dev, 7362 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7363 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7364 le32_to_cpu(fw_alive->process_id), 7365 le32_to_cpu(fw_alive->thread_id), 7366 le64_to_cpu(fw_alive->uptime_seconds)); 7367 } 7368 7369 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7370 void *data) 7371 { 7372 char desc[64] = "", *type; 7373 struct eq_nic_sei_event *eq_nic_sei = data; 7374 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7375 7376 switch (eq_nic_sei->axi_error_cause) { 7377 case RXB: 7378 type = "RXB"; 7379 break; 7380 case RXE: 7381 type = "RXE"; 7382 break; 7383 case TXS: 7384 type = "TXS"; 7385 break; 7386 case TXE: 7387 type = "TXE"; 7388 break; 7389 case QPC_RESP: 7390 type = "QPC_RESP"; 7391 break; 7392 case NON_AXI_ERR: 7393 type = "NON_AXI_ERR"; 7394 break; 7395 case TMR: 7396 type = "TMR"; 7397 break; 7398 default: 7399 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7400 eq_nic_sei->axi_error_cause); 7401 type = "N/A"; 7402 break; 7403 } 7404 7405 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7406 eq_nic_sei->id); 7407 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7408 event_type, desc); 7409 } 7410 7411 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7412 { 7413 /* GAUDI doesn't support any reset except hard-reset */ 7414 return -EPERM; 7415 } 7416 7417 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7418 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7419 { 7420 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7421 int rc = 0; 7422 7423 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7424 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7425 if (!hbm_ecc_data) { 7426 dev_err(hdev->dev, "No FW ECC data"); 7427 return 0; 7428 } 7429 7430 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7431 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7432 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7433 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7434 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7435 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7436 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7437 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7438 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7439 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7440 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7441 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7442 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7443 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7444 7445 dev_err(hdev->dev, 7446 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7447 device, ch, wr_par, rd_par, ca_par, serr, derr); 7448 dev_err(hdev->dev, 7449 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7450 device, ch, hbm_ecc_data->first_addr, type, 7451 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7452 hbm_ecc_data->dec_cnt); 7453 return 0; 7454 } 7455 7456 if (hdev->asic_prop.fw_security_enabled) { 7457 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7458 return 0; 7459 } 7460 7461 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7462 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7463 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7464 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7465 if (val) { 7466 rc = -EIO; 7467 dev_err(hdev->dev, 7468 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7469 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7470 (val >> 2) & 0x1, (val >> 3) & 0x1, 7471 (val >> 4) & 0x1); 7472 7473 val2 = RREG32(base + ch * 0x1000 + 0x060); 7474 dev_err(hdev->dev, 7475 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7476 device, ch * 2, 7477 RREG32(base + ch * 0x1000 + 0x064), 7478 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7479 (val2 & 0xFF0000) >> 16, 7480 (val2 & 0xFF000000) >> 24); 7481 } 7482 7483 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7484 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7485 if (val) { 7486 rc = -EIO; 7487 dev_err(hdev->dev, 7488 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7489 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7490 (val >> 2) & 0x1, (val >> 3) & 0x1, 7491 (val >> 4) & 0x1); 7492 7493 val2 = RREG32(base + ch * 0x1000 + 0x070); 7494 dev_err(hdev->dev, 7495 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7496 device, ch * 2 + 1, 7497 RREG32(base + ch * 0x1000 + 0x074), 7498 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7499 (val2 & 0xFF0000) >> 16, 7500 (val2 & 0xFF000000) >> 24); 7501 } 7502 7503 /* Clear interrupts */ 7504 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7505 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7506 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7507 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7508 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7509 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7510 } 7511 7512 val = RREG32(base + 0x8F30); 7513 val2 = RREG32(base + 0x8F34); 7514 if (val | val2) { 7515 rc = -EIO; 7516 dev_err(hdev->dev, 7517 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7518 device, val, val2); 7519 } 7520 val = RREG32(base + 0x8F40); 7521 val2 = RREG32(base + 0x8F44); 7522 if (val | val2) { 7523 rc = -EIO; 7524 dev_err(hdev->dev, 7525 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7526 device, val, val2); 7527 } 7528 7529 return rc; 7530 } 7531 7532 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7533 { 7534 switch (hbm_event_type) { 7535 case GAUDI_EVENT_HBM0_SPI_0: 7536 case GAUDI_EVENT_HBM0_SPI_1: 7537 return 0; 7538 case GAUDI_EVENT_HBM1_SPI_0: 7539 case GAUDI_EVENT_HBM1_SPI_1: 7540 return 1; 7541 case GAUDI_EVENT_HBM2_SPI_0: 7542 case GAUDI_EVENT_HBM2_SPI_1: 7543 return 2; 7544 case GAUDI_EVENT_HBM3_SPI_0: 7545 case GAUDI_EVENT_HBM3_SPI_1: 7546 return 3; 7547 default: 7548 break; 7549 } 7550 7551 /* Should never happen */ 7552 return 0; 7553 } 7554 7555 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7556 char *interrupt_name) 7557 { 7558 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7559 bool soft_reset_required = false; 7560 7561 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7562 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7563 7564 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7565 if (tpc_interrupts_cause & BIT(i)) { 7566 dev_err_ratelimited(hdev->dev, 7567 "TPC%d_%s interrupt cause: %s\n", 7568 tpc_id, interrupt_name, 7569 gaudi_tpc_interrupts_cause[i]); 7570 /* If this is QM error, we need to soft-reset */ 7571 if (i == 15) 7572 soft_reset_required = true; 7573 } 7574 7575 /* Clear interrupts */ 7576 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7577 7578 return soft_reset_required; 7579 } 7580 7581 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7582 { 7583 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7584 } 7585 7586 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7587 { 7588 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7589 } 7590 7591 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7592 { 7593 ktime_t zero_time = ktime_set(0, 0); 7594 7595 mutex_lock(&hdev->clk_throttling.lock); 7596 7597 switch (event_type) { 7598 case GAUDI_EVENT_FIX_POWER_ENV_S: 7599 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7600 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7601 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7602 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7603 dev_info_ratelimited(hdev->dev, 7604 "Clock throttling due to power consumption\n"); 7605 break; 7606 7607 case GAUDI_EVENT_FIX_POWER_ENV_E: 7608 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7609 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7610 dev_info_ratelimited(hdev->dev, 7611 "Power envelop is safe, back to optimal clock\n"); 7612 break; 7613 7614 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7615 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7616 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7617 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7618 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7619 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7620 dev_info_ratelimited(hdev->dev, 7621 "Clock throttling due to overheating\n"); 7622 break; 7623 7624 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7625 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7626 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7627 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7628 dev_info_ratelimited(hdev->dev, 7629 "Thermal envelop is safe, back to optimal clock\n"); 7630 break; 7631 7632 default: 7633 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7634 event_type); 7635 break; 7636 } 7637 7638 mutex_unlock(&hdev->clk_throttling.lock); 7639 } 7640 7641 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7642 { 7643 struct gaudi_device *gaudi = hdev->asic_specific; 7644 struct hl_info_fw_err_info fw_err_info; 7645 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7646 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7647 u32 fw_fatal_err_flag = 0, flags = 0; 7648 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7649 >> EQ_CTL_EVENT_TYPE_SHIFT); 7650 bool reset_required, reset_direct = false; 7651 u8 cause; 7652 int rc; 7653 7654 if (event_type >= GAUDI_EVENT_SIZE) { 7655 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7656 event_type, GAUDI_EVENT_SIZE - 1); 7657 return; 7658 } 7659 7660 gaudi->events_stat[event_type]++; 7661 gaudi->events_stat_aggregate[event_type]++; 7662 7663 switch (event_type) { 7664 case GAUDI_EVENT_PCIE_CORE_DERR: 7665 case GAUDI_EVENT_PCIE_IF_DERR: 7666 case GAUDI_EVENT_PCIE_PHY_DERR: 7667 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7668 case GAUDI_EVENT_MME0_ACC_DERR: 7669 case GAUDI_EVENT_MME0_SBAB_DERR: 7670 case GAUDI_EVENT_MME1_ACC_DERR: 7671 case GAUDI_EVENT_MME1_SBAB_DERR: 7672 case GAUDI_EVENT_MME2_ACC_DERR: 7673 case GAUDI_EVENT_MME2_SBAB_DERR: 7674 case GAUDI_EVENT_MME3_ACC_DERR: 7675 case GAUDI_EVENT_MME3_SBAB_DERR: 7676 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7677 fallthrough; 7678 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7679 case GAUDI_EVENT_PSOC_MEM_DERR: 7680 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7681 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7682 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7683 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7684 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7685 case GAUDI_EVENT_MMU_DERR: 7686 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7687 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7688 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7689 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7690 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7691 goto reset_device; 7692 7693 case GAUDI_EVENT_GIC500: 7694 case GAUDI_EVENT_AXI_ECC: 7695 case GAUDI_EVENT_L2_RAM_ECC: 7696 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7697 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7698 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7699 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7700 goto reset_device; 7701 7702 case GAUDI_EVENT_HBM0_SPI_0: 7703 case GAUDI_EVENT_HBM1_SPI_0: 7704 case GAUDI_EVENT_HBM2_SPI_0: 7705 case GAUDI_EVENT_HBM3_SPI_0: 7706 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7707 gaudi_hbm_read_interrupts(hdev, 7708 gaudi_hbm_event_to_dev(event_type), 7709 &eq_entry->hbm_ecc_data); 7710 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7711 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7712 goto reset_device; 7713 7714 case GAUDI_EVENT_HBM0_SPI_1: 7715 case GAUDI_EVENT_HBM1_SPI_1: 7716 case GAUDI_EVENT_HBM2_SPI_1: 7717 case GAUDI_EVENT_HBM3_SPI_1: 7718 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7719 gaudi_hbm_read_interrupts(hdev, 7720 gaudi_hbm_event_to_dev(event_type), 7721 &eq_entry->hbm_ecc_data); 7722 hl_fw_unmask_irq(hdev, event_type); 7723 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7724 break; 7725 7726 case GAUDI_EVENT_TPC0_DEC: 7727 case GAUDI_EVENT_TPC1_DEC: 7728 case GAUDI_EVENT_TPC2_DEC: 7729 case GAUDI_EVENT_TPC3_DEC: 7730 case GAUDI_EVENT_TPC4_DEC: 7731 case GAUDI_EVENT_TPC5_DEC: 7732 case GAUDI_EVENT_TPC6_DEC: 7733 case GAUDI_EVENT_TPC7_DEC: 7734 /* In TPC DEC event, notify on TPC assertion. While there isn't 7735 * a specific event for assertion yet, the FW generates TPC DEC event. 7736 * The SW upper layer will inspect an internal mapped area to indicate 7737 * if the event is a TPC Assertion or a "real" TPC DEC. 7738 */ 7739 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7740 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7741 reset_required = gaudi_tpc_read_interrupts(hdev, 7742 tpc_dec_event_to_tpc_id(event_type), 7743 "AXI_SLV_DEC_Error"); 7744 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7745 if (reset_required) { 7746 dev_err(hdev->dev, "reset required due to %s\n", 7747 gaudi_irq_map_table[event_type].name); 7748 7749 reset_direct = true; 7750 goto reset_device; 7751 } else { 7752 hl_fw_unmask_irq(hdev, event_type); 7753 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7754 } 7755 break; 7756 7757 case GAUDI_EVENT_TPC0_KRN_ERR: 7758 case GAUDI_EVENT_TPC1_KRN_ERR: 7759 case GAUDI_EVENT_TPC2_KRN_ERR: 7760 case GAUDI_EVENT_TPC3_KRN_ERR: 7761 case GAUDI_EVENT_TPC4_KRN_ERR: 7762 case GAUDI_EVENT_TPC5_KRN_ERR: 7763 case GAUDI_EVENT_TPC6_KRN_ERR: 7764 case GAUDI_EVENT_TPC7_KRN_ERR: 7765 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7766 reset_required = gaudi_tpc_read_interrupts(hdev, 7767 tpc_krn_event_to_tpc_id(event_type), 7768 "KRN_ERR"); 7769 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7770 if (reset_required) { 7771 dev_err(hdev->dev, "reset required due to %s\n", 7772 gaudi_irq_map_table[event_type].name); 7773 7774 reset_direct = true; 7775 goto reset_device; 7776 } else { 7777 hl_fw_unmask_irq(hdev, event_type); 7778 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7779 } 7780 break; 7781 7782 case GAUDI_EVENT_PCIE_CORE_SERR: 7783 case GAUDI_EVENT_PCIE_IF_SERR: 7784 case GAUDI_EVENT_PCIE_PHY_SERR: 7785 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7786 case GAUDI_EVENT_MME0_ACC_SERR: 7787 case GAUDI_EVENT_MME0_SBAB_SERR: 7788 case GAUDI_EVENT_MME1_ACC_SERR: 7789 case GAUDI_EVENT_MME1_SBAB_SERR: 7790 case GAUDI_EVENT_MME2_ACC_SERR: 7791 case GAUDI_EVENT_MME2_SBAB_SERR: 7792 case GAUDI_EVENT_MME3_ACC_SERR: 7793 case GAUDI_EVENT_MME3_SBAB_SERR: 7794 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7795 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7796 case GAUDI_EVENT_PSOC_MEM_SERR: 7797 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7798 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7799 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7800 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7801 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7802 fallthrough; 7803 case GAUDI_EVENT_MMU_SERR: 7804 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7805 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7806 hl_fw_unmask_irq(hdev, event_type); 7807 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7808 break; 7809 7810 case GAUDI_EVENT_PCIE_DEC: 7811 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7812 case GAUDI_EVENT_PSOC_AXI_DEC: 7813 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7814 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7815 hl_fw_unmask_irq(hdev, event_type); 7816 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7817 break; 7818 7819 case GAUDI_EVENT_MMU_PAGE_FAULT: 7820 case GAUDI_EVENT_MMU_WR_PERM: 7821 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7822 hl_fw_unmask_irq(hdev, event_type); 7823 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7824 break; 7825 7826 case GAUDI_EVENT_MME0_WBC_RSP: 7827 case GAUDI_EVENT_MME0_SBAB0_RSP: 7828 case GAUDI_EVENT_MME1_WBC_RSP: 7829 case GAUDI_EVENT_MME1_SBAB0_RSP: 7830 case GAUDI_EVENT_MME2_WBC_RSP: 7831 case GAUDI_EVENT_MME2_SBAB0_RSP: 7832 case GAUDI_EVENT_MME3_WBC_RSP: 7833 case GAUDI_EVENT_MME3_SBAB0_RSP: 7834 case GAUDI_EVENT_RAZWI_OR_ADC: 7835 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7836 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7837 fallthrough; 7838 case GAUDI_EVENT_NIC0_QM0: 7839 case GAUDI_EVENT_NIC0_QM1: 7840 case GAUDI_EVENT_NIC1_QM0: 7841 case GAUDI_EVENT_NIC1_QM1: 7842 case GAUDI_EVENT_NIC2_QM0: 7843 case GAUDI_EVENT_NIC2_QM1: 7844 case GAUDI_EVENT_NIC3_QM0: 7845 case GAUDI_EVENT_NIC3_QM1: 7846 case GAUDI_EVENT_NIC4_QM0: 7847 case GAUDI_EVENT_NIC4_QM1: 7848 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7849 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7850 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7851 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7852 hl_fw_unmask_irq(hdev, event_type); 7853 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7854 break; 7855 7856 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7857 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7858 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7859 goto reset_device; 7860 7861 case GAUDI_EVENT_TPC0_BMON_SPMU: 7862 case GAUDI_EVENT_TPC1_BMON_SPMU: 7863 case GAUDI_EVENT_TPC2_BMON_SPMU: 7864 case GAUDI_EVENT_TPC3_BMON_SPMU: 7865 case GAUDI_EVENT_TPC4_BMON_SPMU: 7866 case GAUDI_EVENT_TPC5_BMON_SPMU: 7867 case GAUDI_EVENT_TPC6_BMON_SPMU: 7868 case GAUDI_EVENT_TPC7_BMON_SPMU: 7869 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7870 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7871 hl_fw_unmask_irq(hdev, event_type); 7872 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7873 break; 7874 7875 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7876 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7877 hl_fw_unmask_irq(hdev, event_type); 7878 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7879 break; 7880 7881 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7882 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7883 gaudi_print_sm_sei_info(hdev, event_type, 7884 &eq_entry->sm_sei_data); 7885 rc = hl_state_dump(hdev); 7886 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7887 if (rc) 7888 dev_err(hdev->dev, 7889 "Error during system state dump %d\n", rc); 7890 hl_fw_unmask_irq(hdev, event_type); 7891 break; 7892 7893 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7894 break; 7895 7896 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7897 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7898 hl_fw_unmask_irq(hdev, event_type); 7899 break; 7900 7901 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7902 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7903 dev_err(hdev->dev, 7904 "Received high temp H/W interrupt %d (cause %d)\n", 7905 event_type, cause); 7906 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7907 break; 7908 7909 case GAUDI_EVENT_DEV_RESET_REQ: 7910 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7911 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7912 goto reset_device; 7913 7914 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7915 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7916 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7917 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7918 goto reset_device; 7919 7920 case GAUDI_EVENT_FW_ALIVE_S: 7921 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7922 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7923 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7924 fw_err_info.event_id = event_type; 7925 fw_err_info.event_mask = &event_mask; 7926 hl_handle_fw_err(hdev, &fw_err_info); 7927 goto reset_device; 7928 7929 default: 7930 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7931 event_type); 7932 break; 7933 } 7934 7935 if (event_mask) 7936 hl_notifier_event_send_all(hdev, event_mask); 7937 7938 return; 7939 7940 reset_device: 7941 reset_required = true; 7942 7943 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7944 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7945 7946 /* notify on device unavailable while the reset triggered by fw */ 7947 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7948 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7949 } else if (hdev->hard_reset_on_fw_events) { 7950 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7951 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7952 } else { 7953 reset_required = false; 7954 } 7955 7956 if (reset_required) { 7957 /* escalate general hw errors to critical/fatal error */ 7958 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7959 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7960 7961 hl_device_cond_reset(hdev, flags, event_mask); 7962 } else { 7963 hl_fw_unmask_irq(hdev, event_type); 7964 /* Notification on occurred event needs to be sent although reset is not executed */ 7965 if (event_mask) 7966 hl_notifier_event_send_all(hdev, event_mask); 7967 } 7968 } 7969 7970 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7971 { 7972 struct gaudi_device *gaudi = hdev->asic_specific; 7973 7974 if (aggregate) { 7975 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7976 return gaudi->events_stat_aggregate; 7977 } 7978 7979 *size = (u32) sizeof(gaudi->events_stat); 7980 return gaudi->events_stat; 7981 } 7982 7983 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7984 { 7985 struct gaudi_device *gaudi = hdev->asic_specific; 7986 u32 status, timeout_usec; 7987 int rc; 7988 7989 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7990 hdev->reset_info.hard_reset_pending) 7991 return 0; 7992 7993 if (hdev->pldm) 7994 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7995 else 7996 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7997 7998 /* L0 & L1 invalidation */ 7999 WREG32(mmSTLB_INV_PS, 3); 8000 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 8001 WREG32(mmSTLB_INV_PS, 2); 8002 8003 rc = hl_poll_timeout( 8004 hdev, 8005 mmSTLB_INV_PS, 8006 status, 8007 !status, 8008 1000, 8009 timeout_usec); 8010 8011 WREG32(mmSTLB_INV_SET, 0); 8012 8013 return rc; 8014 } 8015 8016 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 8017 bool is_hard, u32 flags, 8018 u32 asid, u64 va, u64 size) 8019 { 8020 /* Treat as invalidate all because there is no range invalidation 8021 * in Gaudi 8022 */ 8023 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 8024 } 8025 8026 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 8027 { 8028 u32 status, timeout_usec; 8029 int rc; 8030 8031 if (hdev->pldm) 8032 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 8033 else 8034 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 8035 8036 WREG32(MMU_ASID, asid); 8037 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 8038 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 8039 WREG32(MMU_BUSY, 0x80000000); 8040 8041 rc = hl_poll_timeout( 8042 hdev, 8043 MMU_BUSY, 8044 status, 8045 !(status & 0x80000000), 8046 1000, 8047 timeout_usec); 8048 8049 if (rc) { 8050 dev_err(hdev->dev, 8051 "Timeout during MMU hop0 config of asid %d\n", asid); 8052 return rc; 8053 } 8054 8055 return 0; 8056 } 8057 8058 static int gaudi_send_heartbeat(struct hl_device *hdev) 8059 { 8060 struct gaudi_device *gaudi = hdev->asic_specific; 8061 8062 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8063 return 0; 8064 8065 return hl_fw_send_heartbeat(hdev); 8066 } 8067 8068 static int gaudi_cpucp_info_get(struct hl_device *hdev) 8069 { 8070 struct gaudi_device *gaudi = hdev->asic_specific; 8071 struct asic_fixed_properties *prop = &hdev->asic_prop; 8072 int rc; 8073 8074 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8075 return 0; 8076 8077 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8078 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8079 mmCPU_BOOT_ERR1); 8080 if (rc) 8081 return rc; 8082 8083 if (!strlen(prop->cpucp_info.card_name)) 8084 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8085 CARD_NAME_MAX_LEN); 8086 8087 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8088 8089 set_default_power_values(hdev); 8090 8091 return 0; 8092 } 8093 8094 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8095 struct engines_data *e) 8096 { 8097 struct gaudi_device *gaudi = hdev->asic_specific; 8098 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8099 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8100 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8101 unsigned long *mask = (unsigned long *)mask_arr; 8102 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8103 bool is_idle = true, is_eng_idle, is_slave; 8104 u64 offset; 8105 int i, dma_id, port; 8106 8107 if (e) 8108 hl_engine_data_sprintf(e, 8109 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8110 "--- ------- ------------ ---------- -------------\n"); 8111 8112 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8113 dma_id = gaudi_dma_assignment[i]; 8114 offset = dma_id * DMA_QMAN_OFFSET; 8115 8116 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8117 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8118 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8119 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8120 IS_DMA_IDLE(dma_core_sts0); 8121 is_idle &= is_eng_idle; 8122 8123 if (mask && !is_eng_idle) 8124 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8125 if (e) 8126 hl_engine_data_sprintf(e, fmt, dma_id, 8127 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8128 qm_cgm_sts, dma_core_sts0); 8129 } 8130 8131 if (e) 8132 hl_engine_data_sprintf(e, 8133 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8134 "--- ------- ------------ ---------- ----------\n"); 8135 8136 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8137 offset = i * TPC_QMAN_OFFSET; 8138 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8139 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8140 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8141 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8142 IS_TPC_IDLE(tpc_cfg_sts); 8143 is_idle &= is_eng_idle; 8144 8145 if (mask && !is_eng_idle) 8146 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8147 if (e) 8148 hl_engine_data_sprintf(e, fmt, i, 8149 is_eng_idle ? "Y" : "N", 8150 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8151 } 8152 8153 if (e) 8154 hl_engine_data_sprintf(e, 8155 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8156 "--- ------- ------------ ---------- -----------\n"); 8157 8158 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8159 offset = i * MME_QMAN_OFFSET; 8160 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8161 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8162 8163 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8164 is_slave = i % 2; 8165 if (!is_slave) { 8166 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8167 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8168 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8169 } 8170 8171 is_idle &= is_eng_idle; 8172 8173 if (mask && !is_eng_idle) 8174 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8175 if (e) { 8176 if (!is_slave) 8177 hl_engine_data_sprintf(e, fmt, i, 8178 is_eng_idle ? "Y" : "N", 8179 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8180 else 8181 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8182 is_eng_idle ? "Y" : "N", "-", 8183 "-", mme_arch_sts); 8184 } 8185 } 8186 8187 if (e) 8188 hl_engine_data_sprintf(e, 8189 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8190 "--- ------- ------------ ----------\n"); 8191 8192 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8193 offset = i * NIC_MACRO_QMAN_OFFSET; 8194 port = 2 * i; 8195 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8196 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8197 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8198 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8199 is_idle &= is_eng_idle; 8200 8201 if (mask && !is_eng_idle) 8202 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8203 if (e) 8204 hl_engine_data_sprintf(e, nic_fmt, port, 8205 is_eng_idle ? "Y" : "N", 8206 qm_glbl_sts0, qm_cgm_sts); 8207 } 8208 8209 port = 2 * i + 1; 8210 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8211 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8212 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8213 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8214 is_idle &= is_eng_idle; 8215 8216 if (mask && !is_eng_idle) 8217 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8218 if (e) 8219 hl_engine_data_sprintf(e, nic_fmt, port, 8220 is_eng_idle ? "Y" : "N", 8221 qm_glbl_sts0, qm_cgm_sts); 8222 } 8223 } 8224 8225 if (e) 8226 hl_engine_data_sprintf(e, "\n"); 8227 8228 return is_idle; 8229 } 8230 8231 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8232 __acquires(&gaudi->hw_queues_lock) 8233 { 8234 struct gaudi_device *gaudi = hdev->asic_specific; 8235 8236 spin_lock(&gaudi->hw_queues_lock); 8237 } 8238 8239 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8240 __releases(&gaudi->hw_queues_lock) 8241 { 8242 struct gaudi_device *gaudi = hdev->asic_specific; 8243 8244 spin_unlock(&gaudi->hw_queues_lock); 8245 } 8246 8247 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8248 { 8249 return hdev->pdev->device; 8250 } 8251 8252 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8253 size_t max_size) 8254 { 8255 struct gaudi_device *gaudi = hdev->asic_specific; 8256 8257 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8258 return 0; 8259 8260 return hl_fw_get_eeprom_data(hdev, data, max_size); 8261 } 8262 8263 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8264 { 8265 struct gaudi_device *gaudi = hdev->asic_specific; 8266 8267 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8268 return 0; 8269 8270 return hl_fw_get_monitor_dump(hdev, data); 8271 } 8272 8273 /* 8274 * this function should be used only during initialization and/or after reset, 8275 * when there are no active users. 8276 */ 8277 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8278 { 8279 u64 kernel_timeout; 8280 u32 status, offset; 8281 int rc; 8282 8283 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8284 8285 if (hdev->pldm) 8286 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8287 else 8288 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8289 8290 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8291 lower_32_bits(tpc_kernel)); 8292 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8293 upper_32_bits(tpc_kernel)); 8294 8295 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8296 lower_32_bits(tpc_kernel)); 8297 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8298 upper_32_bits(tpc_kernel)); 8299 /* set a valid LUT pointer, content is of no significance */ 8300 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8301 lower_32_bits(tpc_kernel)); 8302 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8303 upper_32_bits(tpc_kernel)); 8304 8305 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8306 lower_32_bits(CFG_BASE + 8307 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8308 8309 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8310 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8311 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8312 /* wait a bit for the engine to start executing */ 8313 usleep_range(1000, 1500); 8314 8315 /* wait until engine has finished executing */ 8316 rc = hl_poll_timeout( 8317 hdev, 8318 mmTPC0_CFG_STATUS + offset, 8319 status, 8320 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8321 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8322 1000, 8323 kernel_timeout); 8324 8325 if (rc) { 8326 dev_err(hdev->dev, 8327 "Timeout while waiting for TPC%d icache prefetch\n", 8328 tpc_id); 8329 return -EIO; 8330 } 8331 8332 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8333 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8334 8335 /* wait a bit for the engine to start executing */ 8336 usleep_range(1000, 1500); 8337 8338 /* wait until engine has finished executing */ 8339 rc = hl_poll_timeout( 8340 hdev, 8341 mmTPC0_CFG_STATUS + offset, 8342 status, 8343 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8344 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8345 1000, 8346 kernel_timeout); 8347 8348 if (rc) { 8349 dev_err(hdev->dev, 8350 "Timeout while waiting for TPC%d vector pipe\n", 8351 tpc_id); 8352 return -EIO; 8353 } 8354 8355 rc = hl_poll_timeout( 8356 hdev, 8357 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8358 status, 8359 (status == 0), 8360 1000, 8361 kernel_timeout); 8362 8363 if (rc) { 8364 dev_err(hdev->dev, 8365 "Timeout while waiting for TPC%d kernel to execute\n", 8366 tpc_id); 8367 return -EIO; 8368 } 8369 8370 return 0; 8371 } 8372 8373 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8374 struct hl_ctx *ctx) 8375 { 8376 struct gaudi_device *gaudi = hdev->asic_specific; 8377 int min_alloc_order, rc, collective_cb_size; 8378 8379 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8380 return 0; 8381 8382 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8383 HOST_SPACE_INTERNAL_CB_SZ, 8384 &hdev->internal_cb_pool_dma_addr, 8385 GFP_KERNEL | __GFP_ZERO); 8386 8387 if (!hdev->internal_cb_pool_virt_addr) 8388 return -ENOMEM; 8389 8390 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8391 sizeof(struct packet_fence); 8392 min_alloc_order = ilog2(collective_cb_size); 8393 8394 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8395 if (!hdev->internal_cb_pool) { 8396 dev_err(hdev->dev, 8397 "Failed to create internal CB pool\n"); 8398 rc = -ENOMEM; 8399 goto free_internal_cb_pool; 8400 } 8401 8402 rc = gen_pool_add(hdev->internal_cb_pool, 8403 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8404 HOST_SPACE_INTERNAL_CB_SZ, -1); 8405 if (rc) { 8406 dev_err(hdev->dev, 8407 "Failed to add memory to internal CB pool\n"); 8408 rc = -EFAULT; 8409 goto destroy_internal_cb_pool; 8410 } 8411 8412 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8413 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8414 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8415 8416 if (!hdev->internal_cb_va_base) { 8417 rc = -ENOMEM; 8418 goto destroy_internal_cb_pool; 8419 } 8420 8421 mutex_lock(&hdev->mmu_lock); 8422 8423 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8424 hdev->internal_cb_pool_dma_addr, 8425 HOST_SPACE_INTERNAL_CB_SZ); 8426 if (rc) 8427 goto unreserve_internal_cb_pool; 8428 8429 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8430 if (rc) 8431 goto unmap_internal_cb_pool; 8432 8433 mutex_unlock(&hdev->mmu_lock); 8434 8435 return 0; 8436 8437 unmap_internal_cb_pool: 8438 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8439 HOST_SPACE_INTERNAL_CB_SZ); 8440 unreserve_internal_cb_pool: 8441 mutex_unlock(&hdev->mmu_lock); 8442 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8443 HOST_SPACE_INTERNAL_CB_SZ); 8444 destroy_internal_cb_pool: 8445 gen_pool_destroy(hdev->internal_cb_pool); 8446 free_internal_cb_pool: 8447 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8448 hdev->internal_cb_pool_dma_addr); 8449 8450 return rc; 8451 } 8452 8453 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8454 struct hl_ctx *ctx) 8455 { 8456 struct gaudi_device *gaudi = hdev->asic_specific; 8457 8458 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8459 return; 8460 8461 mutex_lock(&hdev->mmu_lock); 8462 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8463 HOST_SPACE_INTERNAL_CB_SZ); 8464 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8465 HOST_SPACE_INTERNAL_CB_SZ); 8466 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8467 mutex_unlock(&hdev->mmu_lock); 8468 8469 gen_pool_destroy(hdev->internal_cb_pool); 8470 8471 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8472 hdev->internal_cb_pool_dma_addr); 8473 } 8474 8475 static int gaudi_ctx_init(struct hl_ctx *ctx) 8476 { 8477 int rc; 8478 8479 if (ctx->asid == HL_KERNEL_ASID_ID) 8480 return 0; 8481 8482 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8483 if (rc) 8484 return rc; 8485 8486 rc = gaudi_restore_user_registers(ctx->hdev); 8487 if (rc) 8488 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8489 8490 return rc; 8491 } 8492 8493 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8494 { 8495 if (ctx->asid == HL_KERNEL_ASID_ID) 8496 return; 8497 8498 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8499 } 8500 8501 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8502 { 8503 return 0; 8504 } 8505 8506 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8507 { 8508 return gaudi_cq_assignment[cq_idx]; 8509 } 8510 8511 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8512 { 8513 return sizeof(struct packet_msg_short) + 8514 sizeof(struct packet_msg_prot) * 2; 8515 } 8516 8517 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8518 { 8519 return sizeof(struct packet_msg_short) * 4 + 8520 sizeof(struct packet_fence) + 8521 sizeof(struct packet_msg_prot) * 2; 8522 } 8523 8524 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8525 { 8526 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8527 } 8528 8529 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8530 u32 size, bool eb) 8531 { 8532 struct hl_cb *cb = (struct hl_cb *) data; 8533 struct packet_msg_short *pkt; 8534 u32 value, ctl, pkt_size = sizeof(*pkt); 8535 8536 pkt = cb->kernel_address + size; 8537 memset(pkt, 0, pkt_size); 8538 8539 /* Inc by 1, Mode ADD */ 8540 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8541 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8542 8543 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8544 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8545 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8546 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8550 8551 pkt->value = cpu_to_le32(value); 8552 pkt->ctl = cpu_to_le32(ctl); 8553 8554 return size + pkt_size; 8555 } 8556 8557 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8558 u16 addr) 8559 { 8560 u32 ctl, pkt_size = sizeof(*pkt); 8561 8562 memset(pkt, 0, pkt_size); 8563 8564 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8565 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8566 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8567 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8568 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8569 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8570 8571 pkt->value = cpu_to_le32(value); 8572 pkt->ctl = cpu_to_le32(ctl); 8573 8574 return pkt_size; 8575 } 8576 8577 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8578 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8579 u16 sob_val, u16 mon_id) 8580 { 8581 u64 monitor_base; 8582 u32 ctl, value, pkt_size = sizeof(*pkt); 8583 u16 msg_addr_offset; 8584 u8 mask; 8585 8586 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8587 dev_err(hdev->dev, 8588 "sob_base %u (mask %#x) is not valid\n", 8589 sob_base, sob_mask); 8590 return 0; 8591 } 8592 8593 /* 8594 * monitor_base should be the content of the base0 address registers, 8595 * so it will be added to the msg short offsets 8596 */ 8597 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8598 8599 msg_addr_offset = 8600 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8601 monitor_base; 8602 8603 memset(pkt, 0, pkt_size); 8604 8605 /* Monitor config packet: bind the monitor to a sync object */ 8606 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8607 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8608 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8609 0); /* GREATER OR EQUAL*/ 8610 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8611 8612 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8613 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8614 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8615 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8616 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8617 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8618 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8619 8620 pkt->value = cpu_to_le32(value); 8621 pkt->ctl = cpu_to_le32(ctl); 8622 8623 return pkt_size; 8624 } 8625 8626 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8627 { 8628 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8629 8630 memset(pkt, 0, pkt_size); 8631 8632 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8633 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8634 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8635 8636 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8637 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8638 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8639 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8640 8641 pkt->cfg = cpu_to_le32(cfg); 8642 pkt->ctl = cpu_to_le32(ctl); 8643 8644 return pkt_size; 8645 } 8646 8647 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8648 { 8649 u32 offset, nic_index; 8650 8651 switch (queue_id) { 8652 case GAUDI_QUEUE_ID_DMA_0_0: 8653 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8654 break; 8655 case GAUDI_QUEUE_ID_DMA_0_1: 8656 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8657 break; 8658 case GAUDI_QUEUE_ID_DMA_0_2: 8659 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8660 break; 8661 case GAUDI_QUEUE_ID_DMA_0_3: 8662 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8663 break; 8664 case GAUDI_QUEUE_ID_DMA_1_0: 8665 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8666 break; 8667 case GAUDI_QUEUE_ID_DMA_1_1: 8668 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8669 break; 8670 case GAUDI_QUEUE_ID_DMA_1_2: 8671 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8672 break; 8673 case GAUDI_QUEUE_ID_DMA_1_3: 8674 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8675 break; 8676 case GAUDI_QUEUE_ID_DMA_5_0: 8677 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8678 break; 8679 case GAUDI_QUEUE_ID_DMA_5_1: 8680 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8681 break; 8682 case GAUDI_QUEUE_ID_DMA_5_2: 8683 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8684 break; 8685 case GAUDI_QUEUE_ID_DMA_5_3: 8686 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8687 break; 8688 case GAUDI_QUEUE_ID_TPC_7_0: 8689 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8690 break; 8691 case GAUDI_QUEUE_ID_TPC_7_1: 8692 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8693 break; 8694 case GAUDI_QUEUE_ID_TPC_7_2: 8695 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8696 break; 8697 case GAUDI_QUEUE_ID_TPC_7_3: 8698 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8699 break; 8700 case GAUDI_QUEUE_ID_NIC_0_0: 8701 case GAUDI_QUEUE_ID_NIC_1_0: 8702 case GAUDI_QUEUE_ID_NIC_2_0: 8703 case GAUDI_QUEUE_ID_NIC_3_0: 8704 case GAUDI_QUEUE_ID_NIC_4_0: 8705 case GAUDI_QUEUE_ID_NIC_5_0: 8706 case GAUDI_QUEUE_ID_NIC_6_0: 8707 case GAUDI_QUEUE_ID_NIC_7_0: 8708 case GAUDI_QUEUE_ID_NIC_8_0: 8709 case GAUDI_QUEUE_ID_NIC_9_0: 8710 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8711 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8712 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8713 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8714 break; 8715 case GAUDI_QUEUE_ID_NIC_0_1: 8716 case GAUDI_QUEUE_ID_NIC_1_1: 8717 case GAUDI_QUEUE_ID_NIC_2_1: 8718 case GAUDI_QUEUE_ID_NIC_3_1: 8719 case GAUDI_QUEUE_ID_NIC_4_1: 8720 case GAUDI_QUEUE_ID_NIC_5_1: 8721 case GAUDI_QUEUE_ID_NIC_6_1: 8722 case GAUDI_QUEUE_ID_NIC_7_1: 8723 case GAUDI_QUEUE_ID_NIC_8_1: 8724 case GAUDI_QUEUE_ID_NIC_9_1: 8725 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8726 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8727 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8728 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8729 break; 8730 case GAUDI_QUEUE_ID_NIC_0_2: 8731 case GAUDI_QUEUE_ID_NIC_1_2: 8732 case GAUDI_QUEUE_ID_NIC_2_2: 8733 case GAUDI_QUEUE_ID_NIC_3_2: 8734 case GAUDI_QUEUE_ID_NIC_4_2: 8735 case GAUDI_QUEUE_ID_NIC_5_2: 8736 case GAUDI_QUEUE_ID_NIC_6_2: 8737 case GAUDI_QUEUE_ID_NIC_7_2: 8738 case GAUDI_QUEUE_ID_NIC_8_2: 8739 case GAUDI_QUEUE_ID_NIC_9_2: 8740 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8741 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8742 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8743 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8744 break; 8745 case GAUDI_QUEUE_ID_NIC_0_3: 8746 case GAUDI_QUEUE_ID_NIC_1_3: 8747 case GAUDI_QUEUE_ID_NIC_2_3: 8748 case GAUDI_QUEUE_ID_NIC_3_3: 8749 case GAUDI_QUEUE_ID_NIC_4_3: 8750 case GAUDI_QUEUE_ID_NIC_5_3: 8751 case GAUDI_QUEUE_ID_NIC_6_3: 8752 case GAUDI_QUEUE_ID_NIC_7_3: 8753 case GAUDI_QUEUE_ID_NIC_8_3: 8754 case GAUDI_QUEUE_ID_NIC_9_3: 8755 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8756 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8757 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8758 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8759 break; 8760 default: 8761 return -EINVAL; 8762 } 8763 8764 *addr = CFG_BASE + offset; 8765 8766 return 0; 8767 } 8768 8769 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8770 { 8771 u64 monitor_base; 8772 u32 size = 0; 8773 u16 msg_addr_offset; 8774 8775 /* 8776 * monitor_base should be the content of the base0 address registers, 8777 * so it will be added to the msg short offsets 8778 */ 8779 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8780 8781 /* First monitor config packet: low address of the sync */ 8782 msg_addr_offset = 8783 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8784 monitor_base; 8785 8786 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8787 msg_addr_offset); 8788 8789 /* Second monitor config packet: high address of the sync */ 8790 msg_addr_offset = 8791 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8792 monitor_base; 8793 8794 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8795 msg_addr_offset); 8796 8797 /* 8798 * Third monitor config packet: the payload, i.e. what to write when the 8799 * sync triggers 8800 */ 8801 msg_addr_offset = 8802 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8803 monitor_base; 8804 8805 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8806 8807 return size; 8808 } 8809 8810 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8811 struct hl_gen_wait_properties *prop) 8812 { 8813 struct hl_cb *cb = (struct hl_cb *) prop->data; 8814 void *buf = cb->kernel_address; 8815 u64 fence_addr = 0; 8816 u32 size = prop->size; 8817 8818 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8819 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8820 prop->q_idx); 8821 return 0; 8822 } 8823 8824 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8825 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8826 prop->sob_mask, prop->sob_val, prop->mon_id); 8827 size += gaudi_add_fence_pkt(buf + size); 8828 8829 return size; 8830 } 8831 8832 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8833 { 8834 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8835 8836 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8837 hw_sob->sob_id); 8838 8839 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8840 hw_sob->sob_id * 4, 0); 8841 8842 kref_init(&hw_sob->kref); 8843 } 8844 8845 static u64 gaudi_get_device_time(struct hl_device *hdev) 8846 { 8847 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8848 8849 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8850 } 8851 8852 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8853 u32 *block_size, u32 *block_id) 8854 { 8855 return -EPERM; 8856 } 8857 8858 static int gaudi_block_mmap(struct hl_device *hdev, 8859 struct vm_area_struct *vma, 8860 u32 block_id, u32 block_size) 8861 { 8862 return -EPERM; 8863 } 8864 8865 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8866 { 8867 struct cpu_dyn_regs *dyn_regs = 8868 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8869 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8870 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8871 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8872 8873 WREG32(irq_handler_offset, 8874 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8875 } 8876 8877 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8878 { 8879 return -EINVAL; 8880 } 8881 8882 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8883 { 8884 switch (pll_idx) { 8885 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8886 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8887 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8888 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8889 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8890 case HL_GAUDI_MME_PLL: return MME_PLL; 8891 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8892 case HL_GAUDI_IF_PLL: return IF_PLL; 8893 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8894 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8895 default: return -EINVAL; 8896 } 8897 } 8898 8899 static int gaudi_add_sync_to_engine_map_entry( 8900 struct hl_sync_to_engine_map *map, u32 reg_value, 8901 enum hl_sync_engine_type engine_type, u32 engine_id) 8902 { 8903 struct hl_sync_to_engine_map_entry *entry; 8904 8905 /* Reg value represents a partial address of sync object, 8906 * it is used as unique identifier. For this we need to 8907 * clear the cutoff cfg base bits from the value. 8908 */ 8909 if (reg_value == 0 || reg_value == 0xffffffff) 8910 return 0; 8911 reg_value -= lower_32_bits(CFG_BASE); 8912 8913 /* create a new hash entry */ 8914 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8915 if (!entry) 8916 return -ENOMEM; 8917 entry->engine_type = engine_type; 8918 entry->engine_id = engine_id; 8919 entry->sync_id = reg_value; 8920 hash_add(map->tb, &entry->node, reg_value); 8921 8922 return 0; 8923 } 8924 8925 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8926 struct hl_sync_to_engine_map *map) 8927 { 8928 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8929 int i, j, rc; 8930 u32 reg_value; 8931 8932 /* Iterate over TPC engines */ 8933 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8934 8935 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8936 sds->props[SP_NEXT_TPC] * i); 8937 8938 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8939 ENGINE_TPC, i); 8940 if (rc) 8941 goto free_sync_to_engine_map; 8942 } 8943 8944 /* Iterate over MME engines */ 8945 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8946 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8947 8948 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8949 sds->props[SP_NEXT_MME] * i + 8950 j * sizeof(u32)); 8951 8952 rc = gaudi_add_sync_to_engine_map_entry( 8953 map, reg_value, ENGINE_MME, 8954 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8955 if (rc) 8956 goto free_sync_to_engine_map; 8957 } 8958 } 8959 8960 /* Iterate over DMA engines */ 8961 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8962 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8963 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8964 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8965 ENGINE_DMA, i); 8966 if (rc) 8967 goto free_sync_to_engine_map; 8968 } 8969 8970 return 0; 8971 8972 free_sync_to_engine_map: 8973 hl_state_dump_free_sync_to_engine_map(map); 8974 8975 return rc; 8976 } 8977 8978 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8979 { 8980 return FIELD_GET( 8981 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8982 mon->status); 8983 } 8984 8985 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8986 { 8987 const size_t max_write = 10; 8988 u32 gid, mask, sob; 8989 int i, offset; 8990 8991 /* Sync object ID is calculated as follows: 8992 * (8 * group_id + cleared bits in mask) 8993 */ 8994 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8995 mon->arm_data); 8996 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8997 mon->arm_data); 8998 8999 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 9000 max_write; mask >>= 1, i++) { 9001 if (!(mask & 1)) { 9002 sob = gid * MONITOR_MAX_SOBS + i; 9003 9004 if (offset > 0) 9005 offset += snprintf(sobs + offset, max_write, 9006 ", "); 9007 9008 offset += snprintf(sobs + offset, max_write, "%u", sob); 9009 } 9010 } 9011 } 9012 9013 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 9014 struct hl_device *hdev, 9015 struct hl_mon_state_dump *mon) 9016 { 9017 const char *name; 9018 char scratch_buf1[BIN_REG_STRING_SIZE], 9019 scratch_buf2[BIN_REG_STRING_SIZE]; 9020 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 9021 9022 name = hl_state_dump_get_monitor_name(hdev, mon); 9023 if (!name) 9024 name = ""; 9025 9026 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 9027 9028 return hl_snprintf_resize( 9029 buf, size, offset, 9030 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 9031 mon->id, name, 9032 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 9033 mon->arm_data), 9034 hl_format_as_binary( 9035 scratch_buf1, sizeof(scratch_buf1), 9036 FIELD_GET( 9037 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 9038 mon->arm_data)), 9039 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 9040 mon->arm_data), 9041 mon->wr_data, 9042 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 9043 hl_format_as_binary( 9044 scratch_buf2, sizeof(scratch_buf2), 9045 FIELD_GET( 9046 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 9047 mon->status)), 9048 monitored_sobs); 9049 } 9050 9051 9052 static int gaudi_print_fences_single_engine( 9053 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 9054 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 9055 size_t *size, size_t *offset) 9056 { 9057 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9058 int rc = -ENOMEM, i; 9059 u32 *statuses, *fences; 9060 9061 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 9062 sizeof(*statuses), GFP_KERNEL); 9063 if (!statuses) 9064 goto out; 9065 9066 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 9067 sds->props[SP_ENGINE_NUM_OF_QUEUES], 9068 sizeof(*fences), GFP_KERNEL); 9069 if (!fences) 9070 goto free_status; 9071 9072 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9073 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9074 9075 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9076 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9077 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9078 9079 /* The actual print */ 9080 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9081 u32 fence_id; 9082 u64 fence_cnt, fence_rdata; 9083 const char *engine_name; 9084 9085 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9086 statuses[i])) 9087 continue; 9088 9089 fence_id = 9090 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9091 fence_cnt = base_offset + CFG_BASE + 9092 sizeof(u32) * 9093 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9094 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9095 sds->props[SP_FENCE0_RDATA_OFFSET]; 9096 engine_name = hl_sync_engine_to_string(engine_type); 9097 9098 rc = hl_snprintf_resize( 9099 buf, size, offset, 9100 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9101 engine_name, engine_id, 9102 i, fence_id, 9103 fence_cnt, engine_name, engine_id, fence_id, i, 9104 fence_rdata, engine_name, engine_id, fence_id, i, 9105 fences[fence_id], 9106 statuses[i]); 9107 if (rc) 9108 goto free_fences; 9109 } 9110 9111 rc = 0; 9112 9113 free_fences: 9114 kfree(fences); 9115 free_status: 9116 kfree(statuses); 9117 out: 9118 return rc; 9119 } 9120 9121 9122 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9123 .monitor_valid = gaudi_monitor_valid, 9124 .print_single_monitor = gaudi_print_single_monitor, 9125 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9126 .print_fences_single_engine = gaudi_print_fences_single_engine, 9127 }; 9128 9129 static void gaudi_state_dump_init(struct hl_device *hdev) 9130 { 9131 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9132 int i; 9133 9134 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9135 hash_add(sds->so_id_to_str_tb, 9136 &gaudi_so_id_to_str[i].node, 9137 gaudi_so_id_to_str[i].id); 9138 9139 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9140 hash_add(sds->monitor_id_to_str_tb, 9141 &gaudi_monitor_id_to_str[i].node, 9142 gaudi_monitor_id_to_str[i].id); 9143 9144 sds->props = gaudi_state_dump_specs_props; 9145 9146 sds->sync_namager_names = gaudi_sync_manager_names; 9147 9148 sds->funcs = gaudi_state_dump_funcs; 9149 } 9150 9151 static u32 *gaudi_get_stream_master_qid_arr(void) 9152 { 9153 return gaudi_stream_master; 9154 } 9155 9156 static int gaudi_set_dram_properties(struct hl_device *hdev) 9157 { 9158 return 0; 9159 } 9160 9161 static int gaudi_set_binning_masks(struct hl_device *hdev) 9162 { 9163 return 0; 9164 } 9165 9166 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9167 { 9168 } 9169 9170 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9171 { 9172 struct hl_device *hdev = dev_get_drvdata(dev); 9173 struct cpucp_info *cpucp_info; 9174 9175 cpucp_info = &hdev->asic_prop.cpucp_info; 9176 9177 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9178 } 9179 9180 static DEVICE_ATTR_RO(infineon_ver); 9181 9182 static struct attribute *gaudi_vrm_dev_attrs[] = { 9183 &dev_attr_infineon_ver.attr, 9184 NULL, 9185 }; 9186 9187 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9188 struct attribute_group *dev_vrm_attr_grp) 9189 { 9190 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9191 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9192 } 9193 9194 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9195 { 9196 return 0; 9197 } 9198 9199 static const struct hl_asic_funcs gaudi_funcs = { 9200 .early_init = gaudi_early_init, 9201 .early_fini = gaudi_early_fini, 9202 .late_init = gaudi_late_init, 9203 .late_fini = gaudi_late_fini, 9204 .sw_init = gaudi_sw_init, 9205 .sw_fini = gaudi_sw_fini, 9206 .hw_init = gaudi_hw_init, 9207 .hw_fini = gaudi_hw_fini, 9208 .halt_engines = gaudi_halt_engines, 9209 .suspend = gaudi_suspend, 9210 .resume = gaudi_resume, 9211 .mmap = gaudi_mmap, 9212 .ring_doorbell = gaudi_ring_doorbell, 9213 .pqe_write = gaudi_pqe_write, 9214 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9215 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9216 .scrub_device_mem = gaudi_scrub_device_mem, 9217 .scrub_device_dram = gaudi_scrub_device_dram, 9218 .get_int_queue_base = gaudi_get_int_queue_base, 9219 .test_queues = gaudi_test_queues, 9220 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9221 .asic_dma_pool_free = gaudi_dma_pool_free, 9222 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9223 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9224 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9225 .cs_parser = gaudi_cs_parser, 9226 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9227 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9228 .update_eq_ci = gaudi_update_eq_ci, 9229 .context_switch = gaudi_context_switch, 9230 .restore_phase_topology = gaudi_restore_phase_topology, 9231 .debugfs_read_dma = gaudi_debugfs_read_dma, 9232 .add_device_attr = gaudi_add_device_attr, 9233 .handle_eqe = gaudi_handle_eqe, 9234 .get_events_stat = gaudi_get_events_stat, 9235 .read_pte = gaudi_read_pte, 9236 .write_pte = gaudi_write_pte, 9237 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9238 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9239 .mmu_prefetch_cache_range = NULL, 9240 .send_heartbeat = gaudi_send_heartbeat, 9241 .debug_coresight = gaudi_debug_coresight, 9242 .is_device_idle = gaudi_is_device_idle, 9243 .compute_reset_late_init = gaudi_compute_reset_late_init, 9244 .hw_queues_lock = gaudi_hw_queues_lock, 9245 .hw_queues_unlock = gaudi_hw_queues_unlock, 9246 .get_pci_id = gaudi_get_pci_id, 9247 .get_eeprom_data = gaudi_get_eeprom_data, 9248 .get_monitor_dump = gaudi_get_monitor_dump, 9249 .send_cpu_message = gaudi_send_cpu_message, 9250 .pci_bars_map = gaudi_pci_bars_map, 9251 .init_iatu = gaudi_init_iatu, 9252 .rreg = hl_rreg, 9253 .wreg = hl_wreg, 9254 .halt_coresight = gaudi_halt_coresight, 9255 .ctx_init = gaudi_ctx_init, 9256 .ctx_fini = gaudi_ctx_fini, 9257 .pre_schedule_cs = gaudi_pre_schedule_cs, 9258 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9259 .load_firmware_to_device = gaudi_load_firmware_to_device, 9260 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9261 .get_signal_cb_size = gaudi_get_signal_cb_size, 9262 .get_wait_cb_size = gaudi_get_wait_cb_size, 9263 .gen_signal_cb = gaudi_gen_signal_cb, 9264 .gen_wait_cb = gaudi_gen_wait_cb, 9265 .reset_sob = gaudi_reset_sob, 9266 .reset_sob_group = gaudi_reset_sob_group, 9267 .get_device_time = gaudi_get_device_time, 9268 .pb_print_security_errors = NULL, 9269 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9270 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9271 .get_dec_base_addr = NULL, 9272 .scramble_addr = hl_mmu_scramble_addr, 9273 .descramble_addr = hl_mmu_descramble_addr, 9274 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9275 .get_hw_block_id = gaudi_get_hw_block_id, 9276 .hw_block_mmap = gaudi_block_mmap, 9277 .enable_events_from_fw = gaudi_enable_events_from_fw, 9278 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9279 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9280 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9281 .init_firmware_loader = gaudi_init_firmware_loader, 9282 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9283 .state_dump_init = gaudi_state_dump_init, 9284 .get_sob_addr = gaudi_get_sob_addr, 9285 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9286 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9287 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9288 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9289 .access_dev_mem = hl_access_dev_mem, 9290 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9291 .send_device_activity = gaudi_send_device_activity, 9292 .set_dram_properties = gaudi_set_dram_properties, 9293 .set_binning_masks = gaudi_set_binning_masks, 9294 }; 9295 9296 /** 9297 * gaudi_set_asic_funcs - set GAUDI function pointers 9298 * 9299 * @hdev: pointer to hl_device structure 9300 * 9301 */ 9302 void gaudi_set_asic_funcs(struct hl_device *hdev) 9303 { 9304 hdev->asic_funcs = &gaudi_funcs; 9305 } 9306