1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85 #define GAUDI_MAX_STRING_LEN 20 86 87 #define GAUDI_CB_POOL_CB_CNT 512 88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 101 102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 103 104 #define MONITOR_SOB_STRING_SIZE 256 105 106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 107 GAUDI_QUEUE_ID_DMA_0_0, 108 GAUDI_QUEUE_ID_DMA_0_1, 109 GAUDI_QUEUE_ID_DMA_0_2, 110 GAUDI_QUEUE_ID_DMA_0_3, 111 GAUDI_QUEUE_ID_DMA_1_0, 112 GAUDI_QUEUE_ID_DMA_1_1, 113 GAUDI_QUEUE_ID_DMA_1_2, 114 GAUDI_QUEUE_ID_DMA_1_3 115 }; 116 117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 121 "gaudi cpu eq" 122 }; 123 124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 133 }; 134 135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 136 [0] = GAUDI_QUEUE_ID_DMA_0_0, 137 [1] = GAUDI_QUEUE_ID_DMA_0_1, 138 [2] = GAUDI_QUEUE_ID_DMA_0_2, 139 [3] = GAUDI_QUEUE_ID_DMA_0_3, 140 [4] = GAUDI_QUEUE_ID_DMA_1_0, 141 [5] = GAUDI_QUEUE_ID_DMA_1_1, 142 [6] = GAUDI_QUEUE_ID_DMA_1_2, 143 [7] = GAUDI_QUEUE_ID_DMA_1_3, 144 }; 145 146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 147 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 152 [PACKET_REPEAT] = sizeof(struct packet_repeat), 153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 154 [PACKET_FENCE] = sizeof(struct packet_fence), 155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 156 [PACKET_NOP] = sizeof(struct packet_nop), 157 [PACKET_STOP] = sizeof(struct packet_stop), 158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 159 [PACKET_WAIT] = sizeof(struct packet_wait), 160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 161 }; 162 163 static inline bool validate_packet_id(enum packet_id id) 164 { 165 switch (id) { 166 case PACKET_WREG_32: 167 case PACKET_WREG_BULK: 168 case PACKET_MSG_LONG: 169 case PACKET_MSG_SHORT: 170 case PACKET_CP_DMA: 171 case PACKET_REPEAT: 172 case PACKET_MSG_PROT: 173 case PACKET_FENCE: 174 case PACKET_LIN_DMA: 175 case PACKET_NOP: 176 case PACKET_STOP: 177 case PACKET_ARB_POINT: 178 case PACKET_WAIT: 179 case PACKET_LOAD_AND_EXE: 180 return true; 181 default: 182 return false; 183 } 184 } 185 186 static const char * const 187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 188 "tpc_address_exceed_slm", 189 "tpc_div_by_0", 190 "tpc_spu_mac_overflow", 191 "tpc_spu_addsub_overflow", 192 "tpc_spu_abs_overflow", 193 "tpc_spu_fp_dst_nan_inf", 194 "tpc_spu_fp_dst_denorm", 195 "tpc_vpu_mac_overflow", 196 "tpc_vpu_addsub_overflow", 197 "tpc_vpu_abs_overflow", 198 "tpc_vpu_fp_dst_nan_inf", 199 "tpc_vpu_fp_dst_denorm", 200 "tpc_assertions", 201 "tpc_illegal_instruction", 202 "tpc_pc_wrap_around", 203 "tpc_qm_sw_err", 204 "tpc_hbw_rresp_err", 205 "tpc_hbw_bresp_err", 206 "tpc_lbw_rresp_err", 207 "tpc_lbw_bresp_err" 208 }; 209 210 static const char * const 211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 212 "PQ AXI HBW error", 213 "CQ AXI HBW error", 214 "CP AXI HBW error", 215 "CP error due to undefined OPCODE", 216 "CP encountered STOP OPCODE", 217 "CP AXI LBW error", 218 "CP WRREG32 or WRBULK returned error", 219 "N/A", 220 "FENCE 0 inc over max value and clipped", 221 "FENCE 1 inc over max value and clipped", 222 "FENCE 2 inc over max value and clipped", 223 "FENCE 3 inc over max value and clipped", 224 "FENCE 0 dec under min value and clipped", 225 "FENCE 1 dec under min value and clipped", 226 "FENCE 2 dec under min value and clipped", 227 "FENCE 3 dec under min value and clipped" 228 }; 229 230 static const char * const 231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 232 "Choice push while full error", 233 "Choice Q watchdog error", 234 "MSG AXI LBW returned with error" 235 }; 236 237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 351 }; 352 353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 381 }; 382 383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 395 }; 396 397 static s64 gaudi_state_dump_specs_props[] = { 398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 401 [SP_MON_OBJ_WR_ADDR_LOW] = 402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 403 [SP_MON_OBJ_WR_ADDR_HIGH] = 404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 425 [SP_FENCE0_CNT_OFFSET] = 426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_FENCE0_RDATA_OFFSET] = 428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 430 [SP_NUM_CORES] = 1, 431 }; 432 433 static const int gaudi_queue_id_to_engine_id[] = { 434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 463 }; 464 465 /* The order here is opposite to the order of the indexing in the h/w. 466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 467 */ 468 static const char * const gaudi_sync_manager_names[] = { 469 "SYNC_MGR_E_N", 470 "SYNC_MGR_W_N", 471 "SYNC_MGR_E_S", 472 "SYNC_MGR_W_S", 473 NULL 474 }; 475 476 struct ecc_info_extract_params { 477 u64 block_address; 478 u32 num_memories; 479 bool derr; 480 }; 481 482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 483 u64 phys_addr); 484 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 485 struct hl_cs_job *job); 486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 487 u32 size, u64 val); 488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 489 u32 num_regs, u32 val); 490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 491 u32 tpc_id); 492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 493 static int gaudi_cpucp_info_get(struct hl_device *hdev); 494 static void gaudi_disable_clock_gating(struct hl_device *hdev); 495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 497 u32 size, bool eb); 498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 499 struct hl_gen_wait_properties *prop); 500 static inline enum hl_collective_mode 501 get_collective_mode(struct hl_device *hdev, u32 queue_id) 502 { 503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 504 return HL_COLLECTIVE_MASTER; 505 506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 508 return HL_COLLECTIVE_SLAVE; 509 510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 512 return HL_COLLECTIVE_SLAVE; 513 514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 516 return HL_COLLECTIVE_SLAVE; 517 518 return HL_COLLECTIVE_NOT_SUPPORTED; 519 } 520 521 static inline void set_default_power_values(struct hl_device *hdev) 522 { 523 struct asic_fixed_properties *prop = &hdev->asic_prop; 524 525 if (hdev->card_type == cpucp_card_type_pmc) { 526 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 527 528 if (prop->fw_security_enabled) 529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 530 else 531 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 532 } else { 533 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 534 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 535 } 536 } 537 538 static int gaudi_set_fixed_properties(struct hl_device *hdev) 539 { 540 struct asic_fixed_properties *prop = &hdev->asic_prop; 541 u32 num_sync_stream_queues = 0; 542 int i; 543 544 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 545 prop->hw_queues_props = kcalloc(prop->max_queues, 546 sizeof(struct hw_queue_properties), 547 GFP_KERNEL); 548 549 if (!prop->hw_queues_props) 550 return -ENOMEM; 551 552 for (i = 0 ; i < prop->max_queues ; i++) { 553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 555 prop->hw_queues_props[i].driver_only = 0; 556 prop->hw_queues_props[i].supports_sync_stream = 1; 557 prop->hw_queues_props[i].cb_alloc_flags = 558 CB_ALLOC_KERNEL; 559 num_sync_stream_queues++; 560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 562 prop->hw_queues_props[i].driver_only = 1; 563 prop->hw_queues_props[i].supports_sync_stream = 0; 564 prop->hw_queues_props[i].cb_alloc_flags = 565 CB_ALLOC_KERNEL; 566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 568 prop->hw_queues_props[i].driver_only = 0; 569 prop->hw_queues_props[i].supports_sync_stream = 0; 570 prop->hw_queues_props[i].cb_alloc_flags = 571 CB_ALLOC_USER; 572 573 } 574 prop->hw_queues_props[i].collective_mode = 575 get_collective_mode(hdev, i); 576 } 577 578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 579 prop->cfg_base_address = CFG_BASE; 580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 581 prop->host_base_address = HOST_PHYS_BASE; 582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 584 prop->completion_mode = HL_COMPLETION_MODE_JOB; 585 prop->collective_first_sob = 0; 586 prop->collective_first_mon = 0; 587 588 /* 2 SOBs per internal queue stream are reserved for collective */ 589 prop->sync_stream_first_sob = 590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 591 * QMAN_STREAMS * HL_RSVD_SOBS; 592 593 /* 1 monitor per internal queue stream are reserved for collective 594 * 2 monitors per external queue stream are reserved for collective 595 */ 596 prop->sync_stream_first_mon = 597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 598 (NUMBER_OF_EXT_HW_QUEUES * 2); 599 600 prop->dram_base_address = DRAM_PHYS_BASE; 601 prop->dram_size = GAUDI_HBM_SIZE_32GB; 602 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 604 605 prop->sram_base_address = SRAM_BASE_ADDR; 606 prop->sram_size = SRAM_SIZE; 607 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 608 prop->sram_user_base_address = 609 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 610 611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 613 614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 615 if (hdev->pldm) 616 prop->mmu_pgt_size = 0x800000; /* 8MB */ 617 else 618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 619 prop->mmu_pte_size = HL_PTE_SIZE; 620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 622 prop->dram_page_size = PAGE_SIZE_2MB; 623 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 624 prop->dram_supports_virtual_memory = false; 625 626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 636 prop->pmmu.start_addr = VA_HOST_SPACE_START; 637 prop->pmmu.end_addr = 638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 639 prop->pmmu.page_size = PAGE_SIZE_4KB; 640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 641 prop->pmmu.last_mask = LAST_MASK; 642 /* TODO: will be duplicated until implementing per-MMU props */ 643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 645 646 /* PMMU and HPMMU are the same except of page size */ 647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 649 650 /* shifts and masks are the same in PMMU and DMMU */ 651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 653 prop->dmmu.end_addr = VA_HOST_SPACE_END; 654 prop->dmmu.page_size = PAGE_SIZE_2MB; 655 656 prop->cfg_size = CFG_SIZE; 657 prop->max_asid = MAX_ASID; 658 prop->num_of_events = GAUDI_EVENT_SIZE; 659 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 660 661 set_default_power_values(hdev); 662 663 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 664 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 665 666 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 667 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 668 669 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 670 CARD_NAME_MAX_LEN); 671 672 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 673 674 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 675 prop->sync_stream_first_sob + 676 (num_sync_stream_queues * HL_RSVD_SOBS); 677 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 678 prop->sync_stream_first_mon + 679 (num_sync_stream_queues * HL_RSVD_MONS); 680 681 prop->first_available_user_interrupt = USHRT_MAX; 682 683 for (i = 0 ; i < HL_MAX_DCORES ; i++) 684 prop->first_available_cq[i] = USHRT_MAX; 685 686 prop->fw_cpu_boot_dev_sts0_valid = false; 687 prop->fw_cpu_boot_dev_sts1_valid = false; 688 prop->hard_reset_done_by_fw = false; 689 prop->gic_interrupts_enable = true; 690 691 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 692 693 prop->clk_pll_index = HL_GAUDI_MME_PLL; 694 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 695 696 prop->use_get_power_for_reset_history = true; 697 698 prop->configurable_stop_on_err = true; 699 700 prop->set_max_power_on_device_init = true; 701 702 prop->dma_mask = 48; 703 704 return 0; 705 } 706 707 static int gaudi_pci_bars_map(struct hl_device *hdev) 708 { 709 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 710 bool is_wc[3] = {false, false, true}; 711 int rc; 712 713 rc = hl_pci_bars_map(hdev, name, is_wc); 714 if (rc) 715 return rc; 716 717 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 718 (CFG_BASE - SPI_FLASH_BASE_ADDR); 719 720 return 0; 721 } 722 723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 724 { 725 struct gaudi_device *gaudi = hdev->asic_specific; 726 struct hl_inbound_pci_region pci_region; 727 u64 old_addr = addr; 728 int rc; 729 730 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 731 return old_addr; 732 733 if (hdev->asic_prop.iatu_done_by_fw) 734 return U64_MAX; 735 736 /* Inbound Region 2 - Bar 4 - Point to HBM */ 737 pci_region.mode = PCI_BAR_MATCH_MODE; 738 pci_region.bar = HBM_BAR_ID; 739 pci_region.addr = addr; 740 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 741 if (rc) 742 return U64_MAX; 743 744 if (gaudi) { 745 old_addr = gaudi->hbm_bar_cur_addr; 746 gaudi->hbm_bar_cur_addr = addr; 747 } 748 749 return old_addr; 750 } 751 752 static int gaudi_init_iatu(struct hl_device *hdev) 753 { 754 struct hl_inbound_pci_region inbound_region; 755 struct hl_outbound_pci_region outbound_region; 756 int rc; 757 758 if (hdev->asic_prop.iatu_done_by_fw) 759 return 0; 760 761 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 762 inbound_region.mode = PCI_BAR_MATCH_MODE; 763 inbound_region.bar = SRAM_BAR_ID; 764 inbound_region.addr = SRAM_BASE_ADDR; 765 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 766 if (rc) 767 goto done; 768 769 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 770 inbound_region.mode = PCI_BAR_MATCH_MODE; 771 inbound_region.bar = CFG_BAR_ID; 772 inbound_region.addr = SPI_FLASH_BASE_ADDR; 773 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 774 if (rc) 775 goto done; 776 777 /* Inbound Region 2 - Bar 4 - Point to HBM */ 778 inbound_region.mode = PCI_BAR_MATCH_MODE; 779 inbound_region.bar = HBM_BAR_ID; 780 inbound_region.addr = DRAM_PHYS_BASE; 781 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 782 if (rc) 783 goto done; 784 785 /* Outbound Region 0 - Point to Host */ 786 outbound_region.addr = HOST_PHYS_BASE; 787 outbound_region.size = HOST_PHYS_SIZE; 788 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 789 790 done: 791 return rc; 792 } 793 794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 795 { 796 return RREG32(mmHW_STATE); 797 } 798 799 static int gaudi_early_init(struct hl_device *hdev) 800 { 801 struct asic_fixed_properties *prop = &hdev->asic_prop; 802 struct pci_dev *pdev = hdev->pdev; 803 resource_size_t pci_bar_size; 804 u32 fw_boot_status; 805 int rc; 806 807 rc = gaudi_set_fixed_properties(hdev); 808 if (rc) { 809 dev_err(hdev->dev, "Failed setting fixed properties\n"); 810 return rc; 811 } 812 813 /* Check BAR sizes */ 814 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 815 816 if (pci_bar_size != SRAM_BAR_SIZE) { 817 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 818 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 819 rc = -ENODEV; 820 goto free_queue_props; 821 } 822 823 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 824 825 if (pci_bar_size != CFG_BAR_SIZE) { 826 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 827 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 828 rc = -ENODEV; 829 goto free_queue_props; 830 } 831 832 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 833 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 834 835 /* If FW security is enabled at this point it means no access to ELBI */ 836 if (hdev->asic_prop.fw_security_enabled) { 837 hdev->asic_prop.iatu_done_by_fw = true; 838 839 /* 840 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 841 * decision can only be taken based on PCI ID security. 842 */ 843 hdev->asic_prop.gic_interrupts_enable = false; 844 goto pci_init; 845 } 846 847 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 848 &fw_boot_status); 849 if (rc) 850 goto free_queue_props; 851 852 /* Check whether FW is configuring iATU */ 853 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 854 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 855 hdev->asic_prop.iatu_done_by_fw = true; 856 857 pci_init: 858 rc = hl_pci_init(hdev); 859 if (rc) 860 goto free_queue_props; 861 862 /* Before continuing in the initialization, we need to read the preboot 863 * version to determine whether we run with a security-enabled firmware 864 */ 865 rc = hl_fw_read_preboot_status(hdev); 866 if (rc) { 867 if (hdev->reset_on_preboot_fail) 868 hdev->asic_funcs->hw_fini(hdev, true, false); 869 goto pci_fini; 870 } 871 872 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 873 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 874 hdev->asic_funcs->hw_fini(hdev, true, false); 875 } 876 877 return 0; 878 879 pci_fini: 880 hl_pci_fini(hdev); 881 free_queue_props: 882 kfree(hdev->asic_prop.hw_queues_props); 883 return rc; 884 } 885 886 static int gaudi_early_fini(struct hl_device *hdev) 887 { 888 kfree(hdev->asic_prop.hw_queues_props); 889 hl_pci_fini(hdev); 890 891 return 0; 892 } 893 894 /** 895 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 896 * 897 * @hdev: pointer to hl_device structure 898 * 899 */ 900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 901 { 902 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 903 struct asic_fixed_properties *prop = &hdev->asic_prop; 904 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 905 int rc; 906 907 if ((hdev->fw_components & FW_TYPE_LINUX) && 908 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 909 struct gaudi_device *gaudi = hdev->asic_specific; 910 911 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 912 return 0; 913 914 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 915 916 if (rc) 917 return rc; 918 919 freq = pll_freq_arr[2]; 920 } else { 921 /* Backward compatibility */ 922 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 923 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 924 nr = RREG32(mmPSOC_CPU_PLL_NR); 925 nf = RREG32(mmPSOC_CPU_PLL_NF); 926 od = RREG32(mmPSOC_CPU_PLL_OD); 927 928 if (div_sel == DIV_SEL_REF_CLK || 929 div_sel == DIV_SEL_DIVIDED_REF) { 930 if (div_sel == DIV_SEL_REF_CLK) 931 freq = PLL_REF_CLK; 932 else 933 freq = PLL_REF_CLK / (div_fctr + 1); 934 } else if (div_sel == DIV_SEL_PLL_CLK || 935 div_sel == DIV_SEL_DIVIDED_PLL) { 936 pll_clk = PLL_REF_CLK * (nf + 1) / 937 ((nr + 1) * (od + 1)); 938 if (div_sel == DIV_SEL_PLL_CLK) 939 freq = pll_clk; 940 else 941 freq = pll_clk / (div_fctr + 1); 942 } else { 943 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 944 freq = 0; 945 } 946 } 947 948 prop->psoc_timestamp_frequency = freq; 949 prop->psoc_pci_pll_nr = nr; 950 prop->psoc_pci_pll_nf = nf; 951 prop->psoc_pci_pll_od = od; 952 prop->psoc_pci_pll_div_factor = div_fctr; 953 954 return 0; 955 } 956 957 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 958 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 959 { 960 struct asic_fixed_properties *prop = &hdev->asic_prop; 961 struct packet_lin_dma *init_tpc_mem_pkt; 962 struct hl_cs_job *job; 963 struct hl_cb *cb; 964 u64 dst_addr; 965 u32 cb_size, ctl; 966 u8 tpc_id; 967 int rc; 968 969 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 970 if (!cb) 971 return -EFAULT; 972 973 init_tpc_mem_pkt = cb->kernel_address; 974 cb_size = sizeof(*init_tpc_mem_pkt); 975 memset(init_tpc_mem_pkt, 0, cb_size); 976 977 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 978 979 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 980 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 981 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 982 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 983 984 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 985 986 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 987 988 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 989 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 990 round_up(prop->sram_user_base_address, SZ_8K)); 991 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 992 993 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 994 if (!job) { 995 dev_err(hdev->dev, "Failed to allocate a new job\n"); 996 rc = -ENOMEM; 997 goto release_cb; 998 } 999 1000 job->id = 0; 1001 job->user_cb = cb; 1002 atomic_inc(&job->user_cb->cs_cnt); 1003 job->user_cb_size = cb_size; 1004 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1005 job->patched_cb = job->user_cb; 1006 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1007 1008 hl_debugfs_add_job(hdev, job); 1009 1010 rc = gaudi_send_job_on_qman0(hdev, job); 1011 1012 if (rc) 1013 goto free_job; 1014 1015 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1016 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1017 if (rc) 1018 break; 1019 } 1020 1021 free_job: 1022 hl_userptr_delete_list(hdev, &job->userptr_list); 1023 hl_debugfs_remove_job(hdev, job); 1024 kfree(job); 1025 atomic_dec(&cb->cs_cnt); 1026 1027 release_cb: 1028 hl_cb_put(cb); 1029 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1030 1031 return rc; 1032 } 1033 1034 /* 1035 * gaudi_init_tpc_mem() - Initialize TPC memories. 1036 * @hdev: Pointer to hl_device structure. 1037 * 1038 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1039 * 1040 * Return: 0 for success, negative value for error. 1041 */ 1042 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1043 { 1044 const struct firmware *fw; 1045 size_t fw_size; 1046 void *cpu_addr; 1047 dma_addr_t dma_handle; 1048 int rc, count = 5; 1049 1050 again: 1051 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1052 if (rc == -EINTR && count-- > 0) { 1053 msleep(50); 1054 goto again; 1055 } 1056 1057 if (rc) { 1058 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1059 GAUDI_TPC_FW_FILE); 1060 goto out; 1061 } 1062 1063 fw_size = fw->size; 1064 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1065 if (!cpu_addr) { 1066 dev_err(hdev->dev, 1067 "Failed to allocate %zu of dma memory for TPC kernel\n", 1068 fw_size); 1069 rc = -ENOMEM; 1070 goto out; 1071 } 1072 1073 memcpy(cpu_addr, fw->data, fw_size); 1074 1075 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1076 1077 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1078 1079 out: 1080 release_firmware(fw); 1081 return rc; 1082 } 1083 1084 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1085 { 1086 struct gaudi_device *gaudi = hdev->asic_specific; 1087 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1088 struct hl_hw_queue *q; 1089 u32 i, sob_id, sob_group_id, queue_id; 1090 1091 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1092 sob_group_id = 1093 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1094 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1095 1096 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1097 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1098 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1099 q->sync_stream_prop.collective_sob_id = sob_id + i; 1100 } 1101 1102 /* Both DMA5 and TPC7 use the same resources since only a single 1103 * engine need to participate in the reduction process 1104 */ 1105 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1106 q = &hdev->kernel_queues[queue_id]; 1107 q->sync_stream_prop.collective_sob_id = 1108 sob_id + NIC_NUMBER_OF_ENGINES; 1109 1110 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1111 q = &hdev->kernel_queues[queue_id]; 1112 q->sync_stream_prop.collective_sob_id = 1113 sob_id + NIC_NUMBER_OF_ENGINES; 1114 } 1115 1116 static void gaudi_sob_group_hw_reset(struct kref *ref) 1117 { 1118 struct gaudi_hw_sob_group *hw_sob_group = 1119 container_of(ref, struct gaudi_hw_sob_group, kref); 1120 struct hl_device *hdev = hw_sob_group->hdev; 1121 int i; 1122 1123 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1124 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1125 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1126 1127 kref_init(&hw_sob_group->kref); 1128 } 1129 1130 static void gaudi_sob_group_reset_error(struct kref *ref) 1131 { 1132 struct gaudi_hw_sob_group *hw_sob_group = 1133 container_of(ref, struct gaudi_hw_sob_group, kref); 1134 struct hl_device *hdev = hw_sob_group->hdev; 1135 1136 dev_crit(hdev->dev, 1137 "SOB release shouldn't be called here, base_sob_id: %d\n", 1138 hw_sob_group->base_sob_id); 1139 } 1140 1141 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1142 { 1143 struct gaudi_collective_properties *prop; 1144 int i; 1145 1146 prop = &gaudi->collective_props; 1147 1148 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1149 1150 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1151 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1152 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1153 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1154 /* Set collective engine bit */ 1155 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1156 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1157 } 1158 1159 static int gaudi_collective_init(struct hl_device *hdev) 1160 { 1161 u32 i, sob_id, reserved_sobs_per_group; 1162 struct gaudi_collective_properties *prop; 1163 struct gaudi_device *gaudi; 1164 1165 gaudi = hdev->asic_specific; 1166 prop = &gaudi->collective_props; 1167 sob_id = hdev->asic_prop.collective_first_sob; 1168 1169 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1170 reserved_sobs_per_group = 1171 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1172 1173 /* Init SOB groups */ 1174 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1175 prop->hw_sob_group[i].hdev = hdev; 1176 prop->hw_sob_group[i].base_sob_id = sob_id; 1177 sob_id += reserved_sobs_per_group; 1178 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1179 } 1180 1181 for (i = 0 ; i < QMAN_STREAMS; i++) { 1182 prop->next_sob_group_val[i] = 1; 1183 prop->curr_sob_group_idx[i] = 0; 1184 gaudi_collective_map_sobs(hdev, i); 1185 } 1186 1187 gaudi_collective_mstr_sob_mask_set(gaudi); 1188 1189 return 0; 1190 } 1191 1192 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1193 { 1194 struct gaudi_device *gaudi = hdev->asic_specific; 1195 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1196 1197 kref_put(&cprop->hw_sob_group[sob_group].kref, 1198 gaudi_sob_group_hw_reset); 1199 } 1200 1201 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1202 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1203 { 1204 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1205 struct gaudi_collective_properties *cprop; 1206 struct hl_gen_wait_properties wait_prop; 1207 struct hl_sync_stream_properties *prop; 1208 struct gaudi_device *gaudi; 1209 1210 gaudi = hdev->asic_specific; 1211 cprop = &gaudi->collective_props; 1212 queue_id = job->hw_queue_id; 1213 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1214 1215 master_sob_base = 1216 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1217 master_monitor = prop->collective_mstr_mon_id[0]; 1218 1219 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1220 1221 dev_dbg(hdev->dev, 1222 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1223 master_sob_base, cprop->mstr_sob_mask[0], 1224 cprop->next_sob_group_val[stream], 1225 master_monitor, queue_id); 1226 1227 wait_prop.data = (void *) job->patched_cb; 1228 wait_prop.sob_base = master_sob_base; 1229 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1230 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1231 wait_prop.mon_id = master_monitor; 1232 wait_prop.q_idx = queue_id; 1233 wait_prop.size = cb_size; 1234 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1235 1236 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1237 master_monitor = prop->collective_mstr_mon_id[1]; 1238 1239 dev_dbg(hdev->dev, 1240 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1241 master_sob_base, cprop->mstr_sob_mask[1], 1242 cprop->next_sob_group_val[stream], 1243 master_monitor, queue_id); 1244 1245 wait_prop.sob_base = master_sob_base; 1246 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1247 wait_prop.mon_id = master_monitor; 1248 wait_prop.size = cb_size; 1249 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1250 } 1251 1252 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1253 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1254 { 1255 struct hl_gen_wait_properties wait_prop; 1256 struct hl_sync_stream_properties *prop; 1257 u32 queue_id, cb_size = 0; 1258 1259 queue_id = job->hw_queue_id; 1260 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1261 1262 if (job->cs->encaps_signals) { 1263 /* use the encaps signal handle store earlier in the flow 1264 * and set the SOB information from the encaps 1265 * signals handle 1266 */ 1267 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1268 cs_cmpl); 1269 1270 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1271 job->cs->sequence, 1272 cs_cmpl->hw_sob->sob_id, 1273 cs_cmpl->sob_val); 1274 } 1275 1276 /* Add to wait CBs using slave monitor */ 1277 wait_prop.data = (void *) job->user_cb; 1278 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1279 wait_prop.sob_mask = 0x1; 1280 wait_prop.sob_val = cs_cmpl->sob_val; 1281 wait_prop.mon_id = prop->collective_slave_mon_id; 1282 wait_prop.q_idx = queue_id; 1283 wait_prop.size = cb_size; 1284 1285 dev_dbg(hdev->dev, 1286 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1287 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1288 prop->collective_slave_mon_id, queue_id); 1289 1290 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1291 1292 dev_dbg(hdev->dev, 1293 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1294 prop->collective_sob_id, queue_id); 1295 1296 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1297 prop->collective_sob_id, cb_size, false); 1298 } 1299 1300 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1301 { 1302 struct hl_cs_compl *signal_cs_cmpl = 1303 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1304 struct hl_cs_compl *cs_cmpl = 1305 container_of(cs->fence, struct hl_cs_compl, base_fence); 1306 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1307 struct gaudi_collective_properties *cprop; 1308 u32 stream, queue_id, sob_group_offset; 1309 struct gaudi_device *gaudi; 1310 struct hl_device *hdev; 1311 struct hl_cs_job *job; 1312 struct hl_ctx *ctx; 1313 1314 ctx = cs->ctx; 1315 hdev = ctx->hdev; 1316 gaudi = hdev->asic_specific; 1317 cprop = &gaudi->collective_props; 1318 1319 if (cs->encaps_signals) { 1320 cs_cmpl->hw_sob = handle->hw_sob; 1321 /* at this checkpoint we only need the hw_sob pointer 1322 * for the completion check before start going over the jobs 1323 * of the master/slaves, the sob_value will be taken later on 1324 * in gaudi_collective_slave_init_job depends on each 1325 * job wait offset value. 1326 */ 1327 cs_cmpl->sob_val = 0; 1328 } else { 1329 /* copy the SOB id and value of the signal CS */ 1330 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1331 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1332 } 1333 1334 /* check again if the signal cs already completed. 1335 * if yes then don't send any wait cs since the hw_sob 1336 * could be in reset already. if signal is not completed 1337 * then get refcount to hw_sob to prevent resetting the sob 1338 * while wait cs is not submitted. 1339 * note that this check is protected by two locks, 1340 * hw queue lock and completion object lock, 1341 * and the same completion object lock also protects 1342 * the hw_sob reset handler function. 1343 * The hw_queue lock prevent out of sync of hw_sob 1344 * refcount value, changed by signal/wait flows. 1345 */ 1346 spin_lock(&signal_cs_cmpl->lock); 1347 1348 if (completion_done(&cs->signal_fence->completion)) { 1349 spin_unlock(&signal_cs_cmpl->lock); 1350 return -EINVAL; 1351 } 1352 /* Increment kref since all slave queues are now waiting on it */ 1353 kref_get(&cs_cmpl->hw_sob->kref); 1354 1355 spin_unlock(&signal_cs_cmpl->lock); 1356 1357 /* Calculate the stream from collective master queue (1st job) */ 1358 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1359 stream = job->hw_queue_id % 4; 1360 sob_group_offset = 1361 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1362 1363 list_for_each_entry(job, &cs->job_list, cs_node) { 1364 queue_id = job->hw_queue_id; 1365 1366 if (hdev->kernel_queues[queue_id].collective_mode == 1367 HL_COLLECTIVE_MASTER) 1368 gaudi_collective_master_init_job(hdev, job, stream, 1369 sob_group_offset); 1370 else 1371 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1372 } 1373 1374 cs_cmpl->sob_group = sob_group_offset; 1375 1376 /* Handle sob group kref and wraparound */ 1377 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1378 cprop->next_sob_group_val[stream]++; 1379 1380 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1381 /* 1382 * Decrement as we reached the max value. 1383 * The release function won't be called here as we've 1384 * just incremented the refcount. 1385 */ 1386 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1387 gaudi_sob_group_reset_error); 1388 cprop->next_sob_group_val[stream] = 1; 1389 /* only two SOBs are currently in use */ 1390 cprop->curr_sob_group_idx[stream] = 1391 (cprop->curr_sob_group_idx[stream] + 1) & 1392 (HL_RSVD_SOBS - 1); 1393 1394 gaudi_collective_map_sobs(hdev, stream); 1395 1396 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1397 cprop->curr_sob_group_idx[stream], stream); 1398 } 1399 1400 mb(); 1401 hl_fence_put(cs->signal_fence); 1402 cs->signal_fence = NULL; 1403 1404 return 0; 1405 } 1406 1407 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1408 { 1409 u32 cacheline_end, additional_commands; 1410 1411 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1412 additional_commands = sizeof(struct packet_msg_prot) * 2; 1413 1414 if (user_cb_size + additional_commands > cacheline_end) 1415 return cacheline_end - user_cb_size + additional_commands; 1416 else 1417 return additional_commands; 1418 } 1419 1420 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1421 struct hl_ctx *ctx, struct hl_cs *cs, 1422 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1423 u32 encaps_signal_offset) 1424 { 1425 struct hw_queue_properties *hw_queue_prop; 1426 struct hl_cs_counters_atomic *cntr; 1427 struct hl_cs_job *job; 1428 struct hl_cb *cb; 1429 u32 cb_size; 1430 bool patched_cb; 1431 1432 cntr = &hdev->aggregated_cs_counters; 1433 1434 if (mode == HL_COLLECTIVE_MASTER) { 1435 /* CB size of collective master queue contains 1436 * 4 msg short packets for monitor 1 configuration 1437 * 1 fence packet 1438 * 4 msg short packets for monitor 2 configuration 1439 * 1 fence packet 1440 * 2 msg prot packets for completion and MSI 1441 */ 1442 cb_size = sizeof(struct packet_msg_short) * 8 + 1443 sizeof(struct packet_fence) * 2 + 1444 sizeof(struct packet_msg_prot) * 2; 1445 patched_cb = true; 1446 } else { 1447 /* CB size of collective slave queues contains 1448 * 4 msg short packets for monitor configuration 1449 * 1 fence packet 1450 * 1 additional msg short packet for sob signal 1451 */ 1452 cb_size = sizeof(struct packet_msg_short) * 5 + 1453 sizeof(struct packet_fence); 1454 patched_cb = false; 1455 } 1456 1457 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1458 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1459 if (!job) { 1460 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1461 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1462 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1463 return -ENOMEM; 1464 } 1465 1466 /* Allocate internal mapped CB for non patched CBs */ 1467 cb = hl_cb_kernel_create(hdev, cb_size, 1468 hdev->mmu_enable && !patched_cb); 1469 if (!cb) { 1470 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1471 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1472 kfree(job); 1473 return -EFAULT; 1474 } 1475 1476 job->id = 0; 1477 job->cs = cs; 1478 job->user_cb = cb; 1479 atomic_inc(&job->user_cb->cs_cnt); 1480 job->user_cb_size = cb_size; 1481 job->hw_queue_id = queue_id; 1482 1483 /* since its guaranteed to have only one chunk in the collective wait 1484 * cs, we can use this chunk to set the encapsulated signal offset 1485 * in the jobs. 1486 */ 1487 if (cs->encaps_signals) 1488 job->encaps_sig_wait_offset = encaps_signal_offset; 1489 1490 /* 1491 * No need in parsing, user CB is the patched CB. 1492 * We call hl_cb_destroy() out of two reasons - we don't need 1493 * the CB in the CB idr anymore and to decrement its refcount as 1494 * it was incremented inside hl_cb_kernel_create(). 1495 */ 1496 if (patched_cb) 1497 job->patched_cb = job->user_cb; 1498 else 1499 job->patched_cb = NULL; 1500 1501 job->job_cb_size = job->user_cb_size; 1502 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1503 1504 /* increment refcount as for external queues we get completion */ 1505 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1506 cs_get(cs); 1507 1508 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1509 1510 list_add_tail(&job->cs_node, &cs->job_list); 1511 1512 hl_debugfs_add_job(hdev, job); 1513 1514 return 0; 1515 } 1516 1517 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1518 struct hl_ctx *ctx, struct hl_cs *cs, 1519 u32 wait_queue_id, u32 collective_engine_id, 1520 u32 encaps_signal_offset) 1521 { 1522 struct gaudi_device *gaudi = hdev->asic_specific; 1523 struct hw_queue_properties *hw_queue_prop; 1524 u32 queue_id, collective_queue, num_jobs; 1525 u32 stream, nic_queue, nic_idx = 0; 1526 bool skip; 1527 int i, rc = 0; 1528 1529 /* Verify wait queue id is configured as master */ 1530 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1531 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1532 dev_err(hdev->dev, 1533 "Queue %d is not configured as collective master\n", 1534 wait_queue_id); 1535 return -EINVAL; 1536 } 1537 1538 /* Verify engine id is supported */ 1539 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1540 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1541 dev_err(hdev->dev, 1542 "Collective wait does not support engine %u\n", 1543 collective_engine_id); 1544 return -EINVAL; 1545 } 1546 1547 stream = wait_queue_id % 4; 1548 1549 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1550 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1551 else 1552 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1553 1554 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1555 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1556 1557 /* First job goes to the collective master queue, it will wait for 1558 * the collective slave queues to finish execution. 1559 * The synchronization is done using two monitors: 1560 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1561 * reduction engine (DMA5/TPC7). 1562 * 1563 * Rest of the jobs goes to the collective slave queues which will 1564 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1565 */ 1566 for (i = 0 ; i < num_jobs ; i++) { 1567 if (i == 0) { 1568 queue_id = wait_queue_id; 1569 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1570 HL_COLLECTIVE_MASTER, queue_id, 1571 wait_queue_id, encaps_signal_offset); 1572 } else { 1573 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1574 if (gaudi->hw_cap_initialized & 1575 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1576 skip = false; 1577 else 1578 skip = true; 1579 1580 queue_id = nic_queue; 1581 nic_queue += 4; 1582 nic_idx++; 1583 1584 if (skip) 1585 continue; 1586 } else { 1587 queue_id = collective_queue; 1588 } 1589 1590 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1591 HL_COLLECTIVE_SLAVE, queue_id, 1592 wait_queue_id, encaps_signal_offset); 1593 } 1594 1595 if (rc) 1596 return rc; 1597 } 1598 1599 return rc; 1600 } 1601 1602 static int gaudi_late_init(struct hl_device *hdev) 1603 { 1604 struct gaudi_device *gaudi = hdev->asic_specific; 1605 int rc; 1606 1607 rc = gaudi->cpucp_info_get(hdev); 1608 if (rc) { 1609 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1610 return rc; 1611 } 1612 1613 if ((hdev->card_type == cpucp_card_type_pci) && 1614 (hdev->nic_ports_mask & 0x3)) { 1615 dev_info(hdev->dev, 1616 "PCI card detected, only 8 ports are enabled\n"); 1617 hdev->nic_ports_mask &= ~0x3; 1618 1619 /* Stop and disable unused NIC QMANs */ 1620 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1621 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1622 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1623 1624 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1627 1628 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1629 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1630 1631 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1632 } 1633 1634 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1635 if (rc) { 1636 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1637 return rc; 1638 } 1639 1640 /* Scrub both SRAM and DRAM */ 1641 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1642 if (rc) 1643 goto disable_pci_access; 1644 1645 rc = gaudi_fetch_psoc_frequency(hdev); 1646 if (rc) { 1647 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1648 goto disable_pci_access; 1649 } 1650 1651 rc = gaudi_mmu_clear_pgt_range(hdev); 1652 if (rc) { 1653 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1654 goto disable_pci_access; 1655 } 1656 1657 rc = gaudi_init_tpc_mem(hdev); 1658 if (rc) { 1659 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1660 goto disable_pci_access; 1661 } 1662 1663 rc = gaudi_collective_init(hdev); 1664 if (rc) { 1665 dev_err(hdev->dev, "Failed to init collective\n"); 1666 goto disable_pci_access; 1667 } 1668 1669 /* We only support a single ASID for the user, so for the sake of optimization, just 1670 * initialize the ASID one time during device initialization with the fixed value of 1 1671 */ 1672 gaudi_mmu_prepare(hdev, 1); 1673 1674 hl_fw_set_pll_profile(hdev); 1675 1676 return 0; 1677 1678 disable_pci_access: 1679 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1680 1681 return rc; 1682 } 1683 1684 static void gaudi_late_fini(struct hl_device *hdev) 1685 { 1686 hl_hwmon_release_resources(hdev); 1687 } 1688 1689 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1690 { 1691 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1692 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1693 int i, j, rc = 0; 1694 1695 /* 1696 * The device CPU works with 40-bits addresses, while bit 39 must be set 1697 * to '1' when accessing the host. 1698 * Bits 49:39 of the full host address are saved for a later 1699 * configuration of the HW to perform extension to 50 bits. 1700 * Because there is a single HW register that holds the extension bits, 1701 * these bits must be identical in all allocated range. 1702 */ 1703 1704 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1705 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1706 &dma_addr_arr[i], 1707 GFP_KERNEL | __GFP_ZERO); 1708 if (!virt_addr_arr[i]) { 1709 rc = -ENOMEM; 1710 goto free_dma_mem_arr; 1711 } 1712 1713 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1714 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1715 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1716 break; 1717 } 1718 1719 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1720 dev_err(hdev->dev, 1721 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1722 rc = -EFAULT; 1723 goto free_dma_mem_arr; 1724 } 1725 1726 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1727 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1728 hdev->cpu_pci_msb_addr = 1729 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1730 1731 if (!hdev->asic_prop.fw_security_enabled) 1732 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1733 1734 free_dma_mem_arr: 1735 for (j = 0 ; j < i ; j++) 1736 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1737 dma_addr_arr[j]); 1738 1739 return rc; 1740 } 1741 1742 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1743 { 1744 struct gaudi_device *gaudi = hdev->asic_specific; 1745 struct gaudi_internal_qman_info *q; 1746 u32 i; 1747 1748 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1749 q = &gaudi->internal_qmans[i]; 1750 if (!q->pq_kernel_addr) 1751 continue; 1752 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1753 } 1754 } 1755 1756 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1757 { 1758 struct gaudi_device *gaudi = hdev->asic_specific; 1759 struct gaudi_internal_qman_info *q; 1760 int rc, i; 1761 1762 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1763 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1764 continue; 1765 1766 q = &gaudi->internal_qmans[i]; 1767 1768 switch (i) { 1769 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1770 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1771 break; 1772 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1773 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1774 break; 1775 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1776 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1777 break; 1778 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1779 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1780 break; 1781 default: 1782 dev_err(hdev->dev, "Bad internal queue index %d", i); 1783 rc = -EINVAL; 1784 goto free_internal_qmans_pq_mem; 1785 } 1786 1787 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1788 GFP_KERNEL | __GFP_ZERO); 1789 if (!q->pq_kernel_addr) { 1790 rc = -ENOMEM; 1791 goto free_internal_qmans_pq_mem; 1792 } 1793 } 1794 1795 return 0; 1796 1797 free_internal_qmans_pq_mem: 1798 gaudi_free_internal_qmans_pq_mem(hdev); 1799 return rc; 1800 } 1801 1802 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1803 { 1804 struct asic_fixed_properties *prop = &hdev->asic_prop; 1805 struct pci_mem_region *region; 1806 1807 /* CFG */ 1808 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1809 region->region_base = CFG_BASE; 1810 region->region_size = CFG_SIZE; 1811 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1812 region->bar_size = CFG_BAR_SIZE; 1813 region->bar_id = CFG_BAR_ID; 1814 region->used = 1; 1815 1816 /* SRAM */ 1817 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1818 region->region_base = SRAM_BASE_ADDR; 1819 region->region_size = SRAM_SIZE; 1820 region->offset_in_bar = 0; 1821 region->bar_size = SRAM_BAR_SIZE; 1822 region->bar_id = SRAM_BAR_ID; 1823 region->used = 1; 1824 1825 /* DRAM */ 1826 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1827 region->region_base = DRAM_PHYS_BASE; 1828 region->region_size = hdev->asic_prop.dram_size; 1829 region->offset_in_bar = 0; 1830 region->bar_size = prop->dram_pci_bar_size; 1831 region->bar_id = HBM_BAR_ID; 1832 region->used = 1; 1833 1834 /* SP SRAM */ 1835 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1836 region->region_base = PSOC_SCRATCHPAD_ADDR; 1837 region->region_size = PSOC_SCRATCHPAD_SIZE; 1838 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1839 region->bar_size = CFG_BAR_SIZE; 1840 region->bar_id = CFG_BAR_ID; 1841 region->used = 1; 1842 } 1843 1844 static int gaudi_sw_init(struct hl_device *hdev) 1845 { 1846 struct gaudi_device *gaudi; 1847 u32 i, event_id = 0; 1848 int rc; 1849 1850 /* Allocate device structure */ 1851 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1852 if (!gaudi) 1853 return -ENOMEM; 1854 1855 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1856 if (gaudi_irq_map_table[i].valid) { 1857 if (event_id == GAUDI_EVENT_SIZE) { 1858 dev_err(hdev->dev, 1859 "Event array exceeds the limit of %u events\n", 1860 GAUDI_EVENT_SIZE); 1861 rc = -EINVAL; 1862 goto free_gaudi_device; 1863 } 1864 1865 gaudi->events[event_id++] = 1866 gaudi_irq_map_table[i].fc_id; 1867 } 1868 } 1869 1870 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1871 1872 hdev->asic_specific = gaudi; 1873 1874 /* Create DMA pool for small allocations */ 1875 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1876 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1877 if (!hdev->dma_pool) { 1878 dev_err(hdev->dev, "failed to create DMA pool\n"); 1879 rc = -ENOMEM; 1880 goto free_gaudi_device; 1881 } 1882 1883 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1884 if (rc) 1885 goto free_dma_pool; 1886 1887 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1888 if (!hdev->cpu_accessible_dma_pool) { 1889 dev_err(hdev->dev, 1890 "Failed to create CPU accessible DMA pool\n"); 1891 rc = -ENOMEM; 1892 goto free_cpu_dma_mem; 1893 } 1894 1895 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1896 (uintptr_t) hdev->cpu_accessible_dma_mem, 1897 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1898 if (rc) { 1899 dev_err(hdev->dev, 1900 "Failed to add memory to CPU accessible DMA pool\n"); 1901 rc = -EFAULT; 1902 goto free_cpu_accessible_dma_pool; 1903 } 1904 1905 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1906 if (rc) 1907 goto free_cpu_accessible_dma_pool; 1908 1909 spin_lock_init(&gaudi->hw_queues_lock); 1910 1911 hdev->supports_sync_stream = true; 1912 hdev->supports_coresight = true; 1913 hdev->supports_staged_submission = true; 1914 hdev->supports_wait_for_multi_cs = true; 1915 1916 hdev->asic_funcs->set_pci_memory_regions(hdev); 1917 hdev->stream_master_qid_arr = 1918 hdev->asic_funcs->get_stream_master_qid_arr(); 1919 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1920 1921 return 0; 1922 1923 free_cpu_accessible_dma_pool: 1924 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1925 free_cpu_dma_mem: 1926 if (!hdev->asic_prop.fw_security_enabled) 1927 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1928 hdev->cpu_pci_msb_addr); 1929 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1930 hdev->cpu_accessible_dma_address); 1931 free_dma_pool: 1932 dma_pool_destroy(hdev->dma_pool); 1933 free_gaudi_device: 1934 kfree(gaudi); 1935 return rc; 1936 } 1937 1938 static int gaudi_sw_fini(struct hl_device *hdev) 1939 { 1940 struct gaudi_device *gaudi = hdev->asic_specific; 1941 1942 gaudi_free_internal_qmans_pq_mem(hdev); 1943 1944 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1945 1946 if (!hdev->asic_prop.fw_security_enabled) 1947 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1948 hdev->cpu_pci_msb_addr); 1949 1950 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1951 hdev->cpu_accessible_dma_address); 1952 1953 dma_pool_destroy(hdev->dma_pool); 1954 1955 kfree(gaudi); 1956 1957 return 0; 1958 } 1959 1960 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1961 { 1962 struct hl_device *hdev = arg; 1963 int i; 1964 1965 if (hdev->disabled) 1966 return IRQ_HANDLED; 1967 1968 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1969 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1970 1971 hl_irq_handler_eq(irq, &hdev->event_queue); 1972 1973 return IRQ_HANDLED; 1974 } 1975 1976 /* 1977 * For backward compatibility, new MSI interrupts should be set after the 1978 * existing CPU and NIC interrupts. 1979 */ 1980 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1981 bool cpu_eq) 1982 { 1983 int msi_vec; 1984 1985 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1986 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1987 GAUDI_EVENT_QUEUE_MSI_IDX); 1988 1989 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1990 (nr + NIC_NUMBER_OF_ENGINES + 1); 1991 1992 return pci_irq_vector(hdev->pdev, msi_vec); 1993 } 1994 1995 static int gaudi_enable_msi_single(struct hl_device *hdev) 1996 { 1997 int rc, irq; 1998 1999 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2000 2001 irq = gaudi_pci_irq_vector(hdev, 0, false); 2002 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2003 "gaudi single msi", hdev); 2004 if (rc) 2005 dev_err(hdev->dev, 2006 "Failed to request single MSI IRQ\n"); 2007 2008 return rc; 2009 } 2010 2011 static int gaudi_enable_msi_multi(struct hl_device *hdev) 2012 { 2013 int cq_cnt = hdev->asic_prop.completion_queues_count; 2014 int rc, i, irq_cnt_init, irq; 2015 2016 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { 2017 irq = gaudi_pci_irq_vector(hdev, i, false); 2018 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], 2019 &hdev->completion_queue[i]); 2020 if (rc) { 2021 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2022 goto free_irqs; 2023 } 2024 } 2025 2026 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); 2027 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], 2028 &hdev->event_queue); 2029 if (rc) { 2030 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2031 goto free_irqs; 2032 } 2033 2034 return 0; 2035 2036 free_irqs: 2037 for (i = 0 ; i < irq_cnt_init ; i++) 2038 free_irq(gaudi_pci_irq_vector(hdev, i, false), 2039 &hdev->completion_queue[i]); 2040 return rc; 2041 } 2042 2043 static int gaudi_enable_msi(struct hl_device *hdev) 2044 { 2045 struct gaudi_device *gaudi = hdev->asic_specific; 2046 int rc; 2047 2048 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2049 return 0; 2050 2051 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2052 if (rc < 0) { 2053 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2054 return rc; 2055 } 2056 2057 if (rc < NUMBER_OF_INTERRUPTS) { 2058 gaudi->multi_msi_mode = false; 2059 rc = gaudi_enable_msi_single(hdev); 2060 } else { 2061 gaudi->multi_msi_mode = true; 2062 rc = gaudi_enable_msi_multi(hdev); 2063 } 2064 2065 if (rc) 2066 goto free_pci_irq_vectors; 2067 2068 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2069 2070 return 0; 2071 2072 free_pci_irq_vectors: 2073 pci_free_irq_vectors(hdev->pdev); 2074 return rc; 2075 } 2076 2077 static void gaudi_sync_irqs(struct hl_device *hdev) 2078 { 2079 struct gaudi_device *gaudi = hdev->asic_specific; 2080 int i, cq_cnt = hdev->asic_prop.completion_queues_count; 2081 2082 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2083 return; 2084 2085 /* Wait for all pending IRQs to be finished */ 2086 if (gaudi->multi_msi_mode) { 2087 for (i = 0 ; i < cq_cnt ; i++) 2088 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); 2089 2090 synchronize_irq(gaudi_pci_irq_vector(hdev, 2091 GAUDI_EVENT_QUEUE_MSI_IDX, 2092 true)); 2093 } else { 2094 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2095 } 2096 } 2097 2098 static void gaudi_disable_msi(struct hl_device *hdev) 2099 { 2100 struct gaudi_device *gaudi = hdev->asic_specific; 2101 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; 2102 2103 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2104 return; 2105 2106 gaudi_sync_irqs(hdev); 2107 2108 if (gaudi->multi_msi_mode) { 2109 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, 2110 true); 2111 free_irq(irq, &hdev->event_queue); 2112 2113 for (i = 0 ; i < cq_cnt ; i++) { 2114 irq = gaudi_pci_irq_vector(hdev, i, false); 2115 free_irq(irq, &hdev->completion_queue[i]); 2116 } 2117 } else { 2118 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2119 } 2120 2121 pci_free_irq_vectors(hdev->pdev); 2122 2123 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2124 } 2125 2126 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2127 { 2128 struct gaudi_device *gaudi = hdev->asic_specific; 2129 2130 if (hdev->asic_prop.fw_security_enabled) 2131 return; 2132 2133 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2134 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2135 return; 2136 2137 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2138 return; 2139 2140 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2141 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2142 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2143 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2144 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2145 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2146 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2147 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2148 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2149 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2150 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2151 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2152 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2153 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2154 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2155 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2156 2157 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2159 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2161 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2163 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2165 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2167 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2169 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2171 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2173 2174 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2175 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2176 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2177 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2178 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2179 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2180 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2181 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2182 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2183 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2184 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2185 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2186 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2187 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2188 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2189 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2190 2191 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2192 } 2193 2194 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2195 { 2196 struct gaudi_device *gaudi = hdev->asic_specific; 2197 2198 if (hdev->asic_prop.fw_security_enabled) 2199 return; 2200 2201 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2202 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2203 return; 2204 2205 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2206 return; 2207 2208 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2209 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2210 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2211 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2212 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2213 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2214 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2215 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2216 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2218 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2220 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2222 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2224 2225 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2227 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2229 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2231 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2233 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2235 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2237 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2239 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2240 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2241 2242 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2243 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2244 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2245 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2246 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2247 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2248 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2249 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2250 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2251 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2252 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2253 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2254 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2255 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2256 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2257 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2258 2259 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2260 } 2261 2262 static void gaudi_init_e2e(struct hl_device *hdev) 2263 { 2264 if (hdev->asic_prop.fw_security_enabled) 2265 return; 2266 2267 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2268 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2269 return; 2270 2271 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2272 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2273 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2274 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2275 2276 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2277 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2278 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2279 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2280 2281 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2282 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2283 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2284 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2285 2286 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2287 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2288 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2289 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2290 2291 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2292 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2293 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2294 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2295 2296 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2297 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2298 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2299 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2300 2301 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2302 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2303 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2304 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2305 2306 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2307 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2308 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2309 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2310 2311 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2312 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2313 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2314 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2315 2316 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2317 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2318 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2319 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2320 2321 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2322 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2323 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2324 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2325 2326 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2327 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2328 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2329 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2330 2331 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2332 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2333 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2334 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2335 2336 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2337 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2338 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2339 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2340 2341 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2342 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2343 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2344 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2345 2346 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2347 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2348 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2349 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2350 2351 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2352 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2354 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2355 2356 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2357 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2359 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2360 2361 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2362 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2364 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2365 2366 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2367 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2369 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2370 2371 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2372 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2374 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2375 2376 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2377 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2379 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2380 2381 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2382 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2384 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2385 2386 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2387 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2389 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2390 2391 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2392 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2393 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2394 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2395 2396 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2397 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2398 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2399 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2400 2401 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2402 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2403 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2404 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2405 2406 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2407 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2408 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2409 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2410 2411 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2412 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2413 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2414 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2415 2416 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2417 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2418 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2419 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2420 2421 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2422 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2423 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2424 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2425 2426 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2427 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2428 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2429 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2430 2431 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2432 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2433 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2434 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2435 2436 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2437 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2438 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2439 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2440 2441 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2442 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2443 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2444 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2445 2446 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2447 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2448 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2449 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2450 2451 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2452 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2453 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2454 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2455 2456 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2457 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2458 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2459 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2460 2461 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2462 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2463 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2464 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2465 2466 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2467 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2468 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2469 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2470 2471 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2472 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2473 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2474 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2475 2476 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2477 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2478 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2479 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2480 2481 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2482 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2483 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2484 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2485 2486 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2487 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2488 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2489 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2490 2491 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2492 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2493 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2494 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2495 2496 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2497 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2498 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2499 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2500 2501 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2502 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2503 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2504 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2505 2506 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2507 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2508 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2509 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2510 } 2511 2512 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2513 { 2514 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2515 2516 if (hdev->asic_prop.fw_security_enabled) 2517 return; 2518 2519 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2520 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2521 return; 2522 2523 hbm0_wr = 0x33333333; 2524 hbm0_rd = 0x77777777; 2525 hbm1_wr = 0x55555555; 2526 hbm1_rd = 0xDDDDDDDD; 2527 2528 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2529 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2530 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2531 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2532 2533 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2534 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2535 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2536 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2537 2538 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2539 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2540 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2541 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2542 2543 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2544 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2545 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2546 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2547 2548 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2549 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2550 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2551 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2552 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2553 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2554 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2555 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2556 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2557 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2558 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2559 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2560 2561 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2562 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2563 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2564 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2565 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2566 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2567 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2568 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2569 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2570 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2571 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2572 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2573 } 2574 2575 static void gaudi_init_golden_registers(struct hl_device *hdev) 2576 { 2577 u32 tpc_offset; 2578 int tpc_id, i; 2579 2580 gaudi_init_e2e(hdev); 2581 gaudi_init_hbm_cred(hdev); 2582 2583 for (tpc_id = 0, tpc_offset = 0; 2584 tpc_id < TPC_NUMBER_OF_ENGINES; 2585 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2586 /* Mask all arithmetic interrupts from TPC */ 2587 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2588 /* Set 16 cache lines */ 2589 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2590 ICACHE_FETCH_LINE_NUM, 2); 2591 } 2592 2593 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2594 for (i = 0 ; i < 128 ; i += 8) 2595 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2596 2597 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2598 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2599 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2600 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2601 } 2602 2603 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2604 int qman_id, dma_addr_t qman_pq_addr) 2605 { 2606 struct cpu_dyn_regs *dyn_regs = 2607 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2608 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2609 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2610 u32 q_off, dma_qm_offset; 2611 u32 dma_qm_err_cfg, irq_handler_offset; 2612 2613 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2614 2615 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2616 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2617 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2618 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2619 so_base_en_lo = lower_32_bits(CFG_BASE + 2620 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2621 so_base_en_hi = upper_32_bits(CFG_BASE + 2622 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2623 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2624 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2625 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2626 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2627 so_base_ws_lo = lower_32_bits(CFG_BASE + 2628 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2629 so_base_ws_hi = upper_32_bits(CFG_BASE + 2630 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2631 2632 q_off = dma_qm_offset + qman_id * 4; 2633 2634 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2635 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2636 2637 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2638 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2639 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2640 2641 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2642 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2643 QMAN_LDMA_SRC_OFFSET); 2644 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2645 QMAN_LDMA_DST_OFFSET); 2646 2647 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2648 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2649 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2650 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2651 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2652 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2653 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2654 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2655 2656 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2657 2658 /* The following configuration is needed only once per QMAN */ 2659 if (qman_id == 0) { 2660 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2661 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2662 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2663 2664 /* Configure RAZWI IRQ */ 2665 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2666 if (hdev->stop_on_err) 2667 dma_qm_err_cfg |= 2668 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2669 2670 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2671 2672 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2673 lower_32_bits(CFG_BASE + irq_handler_offset)); 2674 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2675 upper_32_bits(CFG_BASE + irq_handler_offset)); 2676 2677 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2678 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2679 dma_id); 2680 2681 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2682 QM_ARB_ERR_MSG_EN_MASK); 2683 2684 /* Set timeout to maximum */ 2685 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2686 2687 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2688 QMAN_EXTERNAL_MAKE_TRUSTED); 2689 2690 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2691 } 2692 } 2693 2694 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2695 { 2696 struct cpu_dyn_regs *dyn_regs = 2697 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2698 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2699 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2700 u32 irq_handler_offset; 2701 2702 /* Set to maximum possible according to physical size */ 2703 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2704 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2705 2706 /* WA for H/W bug H3-2116 */ 2707 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2708 2709 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2710 if (hdev->stop_on_err) 2711 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2712 2713 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2714 2715 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2716 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2717 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2718 2719 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2720 lower_32_bits(CFG_BASE + irq_handler_offset)); 2721 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2722 upper_32_bits(CFG_BASE + irq_handler_offset)); 2723 2724 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2725 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2726 WREG32(mmDMA0_CORE_PROT + dma_offset, 2727 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2728 /* If the channel is secured, it should be in MMU bypass mode */ 2729 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2730 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2731 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2732 } 2733 2734 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2735 u32 enable_mask) 2736 { 2737 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2738 2739 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2740 } 2741 2742 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2743 { 2744 struct gaudi_device *gaudi = hdev->asic_specific; 2745 struct hl_hw_queue *q; 2746 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2747 2748 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2749 return; 2750 2751 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2752 dma_id = gaudi_dma_assignment[i]; 2753 /* 2754 * For queues after the CPU Q need to add 1 to get the correct 2755 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2756 * order to get the correct MSI register. 2757 */ 2758 if (dma_id > 1) { 2759 cpu_skip = 1; 2760 nic_skip = NIC_NUMBER_OF_ENGINES; 2761 } else { 2762 cpu_skip = 0; 2763 nic_skip = 0; 2764 } 2765 2766 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2767 q_idx = 4 * dma_id + j + cpu_skip; 2768 q = &hdev->kernel_queues[q_idx]; 2769 q->cq_id = cq_id++; 2770 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2771 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2772 q->bus_address); 2773 } 2774 2775 gaudi_init_dma_core(hdev, dma_id); 2776 2777 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2778 } 2779 2780 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2781 } 2782 2783 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2784 int qman_id, u64 qman_base_addr) 2785 { 2786 struct cpu_dyn_regs *dyn_regs = 2787 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2788 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2789 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2790 u32 dma_qm_err_cfg, irq_handler_offset; 2791 u32 q_off, dma_qm_offset; 2792 2793 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2794 2795 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2796 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2797 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2798 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2799 so_base_en_lo = lower_32_bits(CFG_BASE + 2800 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2801 so_base_en_hi = upper_32_bits(CFG_BASE + 2802 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2803 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2804 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2805 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2806 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2807 so_base_ws_lo = lower_32_bits(CFG_BASE + 2808 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2809 so_base_ws_hi = upper_32_bits(CFG_BASE + 2810 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2811 2812 q_off = dma_qm_offset + qman_id * 4; 2813 2814 if (qman_id < 4) { 2815 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2816 lower_32_bits(qman_base_addr)); 2817 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2818 upper_32_bits(qman_base_addr)); 2819 2820 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2821 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2822 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2823 2824 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2825 QMAN_CPDMA_SIZE_OFFSET); 2826 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2827 QMAN_CPDMA_SRC_OFFSET); 2828 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2829 QMAN_CPDMA_DST_OFFSET); 2830 } else { 2831 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2832 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2833 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2834 2835 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2836 QMAN_LDMA_SIZE_OFFSET); 2837 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2838 QMAN_LDMA_SRC_OFFSET); 2839 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2840 QMAN_LDMA_DST_OFFSET); 2841 2842 /* Configure RAZWI IRQ */ 2843 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2844 if (hdev->stop_on_err) 2845 dma_qm_err_cfg |= 2846 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2847 2848 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2849 2850 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2851 lower_32_bits(CFG_BASE + irq_handler_offset)); 2852 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2853 upper_32_bits(CFG_BASE + irq_handler_offset)); 2854 2855 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2856 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2857 dma_id); 2858 2859 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2860 QM_ARB_ERR_MSG_EN_MASK); 2861 2862 /* Set timeout to maximum */ 2863 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2864 2865 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2866 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2867 QMAN_INTERNAL_MAKE_TRUSTED); 2868 } 2869 2870 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2871 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2872 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2873 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2874 2875 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2876 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2877 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2878 mtr_base_ws_lo); 2879 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2880 mtr_base_ws_hi); 2881 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2882 so_base_ws_lo); 2883 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2884 so_base_ws_hi); 2885 } 2886 } 2887 2888 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2889 { 2890 struct gaudi_device *gaudi = hdev->asic_specific; 2891 struct gaudi_internal_qman_info *q; 2892 u64 qman_base_addr; 2893 int i, j, dma_id, internal_q_index; 2894 2895 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2896 return; 2897 2898 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2899 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2900 2901 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2902 /* 2903 * Add the CPU queue in order to get the correct queue 2904 * number as all internal queue are placed after it 2905 */ 2906 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2907 2908 q = &gaudi->internal_qmans[internal_q_index]; 2909 qman_base_addr = (u64) q->pq_dma_addr; 2910 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2911 qman_base_addr); 2912 } 2913 2914 /* Initializing lower CP for HBM DMA QMAN */ 2915 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2916 2917 gaudi_init_dma_core(hdev, dma_id); 2918 2919 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2920 } 2921 2922 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2923 } 2924 2925 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2926 int qman_id, u64 qman_base_addr) 2927 { 2928 struct cpu_dyn_regs *dyn_regs = 2929 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2930 u32 mtr_base_lo, mtr_base_hi; 2931 u32 so_base_lo, so_base_hi; 2932 u32 irq_handler_offset; 2933 u32 q_off, mme_id; 2934 u32 mme_qm_err_cfg; 2935 2936 mtr_base_lo = lower_32_bits(CFG_BASE + 2937 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2938 mtr_base_hi = upper_32_bits(CFG_BASE + 2939 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2940 so_base_lo = lower_32_bits(CFG_BASE + 2941 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2942 so_base_hi = upper_32_bits(CFG_BASE + 2943 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2944 2945 q_off = mme_offset + qman_id * 4; 2946 2947 if (qman_id < 4) { 2948 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2949 lower_32_bits(qman_base_addr)); 2950 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2951 upper_32_bits(qman_base_addr)); 2952 2953 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2954 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2955 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2956 2957 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2958 QMAN_CPDMA_SIZE_OFFSET); 2959 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2960 QMAN_CPDMA_SRC_OFFSET); 2961 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2962 QMAN_CPDMA_DST_OFFSET); 2963 } else { 2964 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2965 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2966 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2967 2968 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2969 QMAN_LDMA_SIZE_OFFSET); 2970 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2971 QMAN_LDMA_SRC_OFFSET); 2972 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2973 QMAN_LDMA_DST_OFFSET); 2974 2975 /* Configure RAZWI IRQ */ 2976 mme_id = mme_offset / 2977 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2978 2979 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2980 if (hdev->stop_on_err) 2981 mme_qm_err_cfg |= 2982 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2983 2984 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2985 2986 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2987 lower_32_bits(CFG_BASE + irq_handler_offset)); 2988 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2989 upper_32_bits(CFG_BASE + irq_handler_offset)); 2990 2991 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2992 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2993 mme_id); 2994 2995 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2996 QM_ARB_ERR_MSG_EN_MASK); 2997 2998 /* Set timeout to maximum */ 2999 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 3000 3001 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 3002 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 3003 QMAN_INTERNAL_MAKE_TRUSTED); 3004 } 3005 3006 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 3007 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 3008 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 3009 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 3010 } 3011 3012 static void gaudi_init_mme_qmans(struct hl_device *hdev) 3013 { 3014 struct gaudi_device *gaudi = hdev->asic_specific; 3015 struct gaudi_internal_qman_info *q; 3016 u64 qman_base_addr; 3017 u32 mme_offset; 3018 int i, internal_q_index; 3019 3020 if (gaudi->hw_cap_initialized & HW_CAP_MME) 3021 return; 3022 3023 /* 3024 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 3025 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 3026 */ 3027 3028 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3029 3030 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 3031 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 3032 q = &gaudi->internal_qmans[internal_q_index]; 3033 qman_base_addr = (u64) q->pq_dma_addr; 3034 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 3035 qman_base_addr); 3036 if (i == 3) 3037 mme_offset = 0; 3038 } 3039 3040 /* Initializing lower CP for MME QMANs */ 3041 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3042 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 3043 gaudi_init_mme_qman(hdev, 0, 4, 0); 3044 3045 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3046 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3047 3048 gaudi->hw_cap_initialized |= HW_CAP_MME; 3049 } 3050 3051 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 3052 int qman_id, u64 qman_base_addr) 3053 { 3054 struct cpu_dyn_regs *dyn_regs = 3055 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3056 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3057 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3058 u32 tpc_qm_err_cfg, irq_handler_offset; 3059 u32 q_off, tpc_id; 3060 3061 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3062 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3063 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3064 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3065 so_base_en_lo = lower_32_bits(CFG_BASE + 3066 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3067 so_base_en_hi = upper_32_bits(CFG_BASE + 3068 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3069 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3070 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3071 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3072 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3073 so_base_ws_lo = lower_32_bits(CFG_BASE + 3074 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3075 so_base_ws_hi = upper_32_bits(CFG_BASE + 3076 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3077 3078 q_off = tpc_offset + qman_id * 4; 3079 3080 tpc_id = tpc_offset / 3081 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3082 3083 if (qman_id < 4) { 3084 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3085 lower_32_bits(qman_base_addr)); 3086 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3087 upper_32_bits(qman_base_addr)); 3088 3089 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3090 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3091 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3092 3093 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3094 QMAN_CPDMA_SIZE_OFFSET); 3095 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3096 QMAN_CPDMA_SRC_OFFSET); 3097 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3098 QMAN_CPDMA_DST_OFFSET); 3099 } else { 3100 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3101 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3102 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3103 3104 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3105 QMAN_LDMA_SIZE_OFFSET); 3106 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3107 QMAN_LDMA_SRC_OFFSET); 3108 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3109 QMAN_LDMA_DST_OFFSET); 3110 3111 /* Configure RAZWI IRQ */ 3112 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3113 if (hdev->stop_on_err) 3114 tpc_qm_err_cfg |= 3115 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3116 3117 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3118 3119 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3120 lower_32_bits(CFG_BASE + irq_handler_offset)); 3121 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3122 upper_32_bits(CFG_BASE + irq_handler_offset)); 3123 3124 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3125 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3126 tpc_id); 3127 3128 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3129 QM_ARB_ERR_MSG_EN_MASK); 3130 3131 /* Set timeout to maximum */ 3132 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3133 3134 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3135 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3136 QMAN_INTERNAL_MAKE_TRUSTED); 3137 } 3138 3139 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3140 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3141 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3142 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3143 3144 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3145 if (tpc_id == 6) { 3146 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3147 mtr_base_ws_lo); 3148 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3149 mtr_base_ws_hi); 3150 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3151 so_base_ws_lo); 3152 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3153 so_base_ws_hi); 3154 } 3155 } 3156 3157 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3158 { 3159 struct gaudi_device *gaudi = hdev->asic_specific; 3160 struct gaudi_internal_qman_info *q; 3161 u64 qman_base_addr; 3162 u32 so_base_hi, tpc_offset = 0; 3163 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3164 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3165 int i, tpc_id, internal_q_index; 3166 3167 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3168 return; 3169 3170 so_base_hi = upper_32_bits(CFG_BASE + 3171 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3172 3173 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3174 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3175 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3176 tpc_id * QMAN_STREAMS + i; 3177 q = &gaudi->internal_qmans[internal_q_index]; 3178 qman_base_addr = (u64) q->pq_dma_addr; 3179 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3180 qman_base_addr); 3181 3182 if (i == 3) { 3183 /* Initializing lower CP for TPC QMAN */ 3184 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3185 3186 /* Enable the QMAN and TPC channel */ 3187 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3188 QMAN_TPC_ENABLE); 3189 } 3190 } 3191 3192 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3193 so_base_hi); 3194 3195 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3196 3197 gaudi->hw_cap_initialized |= 3198 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3199 } 3200 } 3201 3202 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3203 int qman_id, u64 qman_base_addr, int nic_id) 3204 { 3205 struct cpu_dyn_regs *dyn_regs = 3206 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3207 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3208 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3209 u32 nic_qm_err_cfg, irq_handler_offset; 3210 u32 q_off; 3211 3212 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3213 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3214 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3215 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3216 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3217 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3218 so_base_en_hi = upper_32_bits(CFG_BASE + 3219 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3220 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3221 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3222 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3223 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3224 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3225 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3226 so_base_ws_hi = upper_32_bits(CFG_BASE + 3227 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3228 3229 q_off = nic_offset + qman_id * 4; 3230 3231 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3232 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3233 3234 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3235 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3236 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3237 3238 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3239 QMAN_LDMA_SIZE_OFFSET); 3240 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3241 QMAN_LDMA_SRC_OFFSET); 3242 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3243 QMAN_LDMA_DST_OFFSET); 3244 3245 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3246 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3247 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3248 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3249 3250 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3251 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3252 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3253 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3254 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3255 3256 if (qman_id == 0) { 3257 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3258 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3259 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3260 3261 /* Configure RAZWI IRQ */ 3262 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3263 if (hdev->stop_on_err) 3264 nic_qm_err_cfg |= 3265 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3266 3267 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3268 3269 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3270 lower_32_bits(CFG_BASE + irq_handler_offset)); 3271 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3272 upper_32_bits(CFG_BASE + irq_handler_offset)); 3273 3274 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3275 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3276 nic_id); 3277 3278 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3279 QM_ARB_ERR_MSG_EN_MASK); 3280 3281 /* Set timeout to maximum */ 3282 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3283 3284 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3285 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3286 QMAN_INTERNAL_MAKE_TRUSTED); 3287 } 3288 } 3289 3290 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3291 { 3292 struct gaudi_device *gaudi = hdev->asic_specific; 3293 struct gaudi_internal_qman_info *q; 3294 u64 qman_base_addr; 3295 u32 nic_offset = 0; 3296 u32 nic_delta_between_qmans = 3297 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3298 u32 nic_delta_between_nics = 3299 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3300 int i, nic_id, internal_q_index; 3301 3302 if (!hdev->nic_ports_mask) 3303 return; 3304 3305 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3306 return; 3307 3308 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3309 3310 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3311 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3312 nic_offset += nic_delta_between_qmans; 3313 if (nic_id & 1) { 3314 nic_offset -= (nic_delta_between_qmans * 2); 3315 nic_offset += nic_delta_between_nics; 3316 } 3317 continue; 3318 } 3319 3320 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3321 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3322 nic_id * QMAN_STREAMS + i; 3323 q = &gaudi->internal_qmans[internal_q_index]; 3324 qman_base_addr = (u64) q->pq_dma_addr; 3325 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3326 qman_base_addr, nic_id); 3327 } 3328 3329 /* Enable the QMAN */ 3330 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3331 3332 nic_offset += nic_delta_between_qmans; 3333 if (nic_id & 1) { 3334 nic_offset -= (nic_delta_between_qmans * 2); 3335 nic_offset += nic_delta_between_nics; 3336 } 3337 3338 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3339 } 3340 } 3341 3342 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3343 { 3344 struct gaudi_device *gaudi = hdev->asic_specific; 3345 3346 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3347 return; 3348 3349 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3350 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3351 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3352 } 3353 3354 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3355 { 3356 struct gaudi_device *gaudi = hdev->asic_specific; 3357 3358 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3359 return; 3360 3361 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3362 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3363 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3364 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3365 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3366 } 3367 3368 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3369 { 3370 struct gaudi_device *gaudi = hdev->asic_specific; 3371 3372 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3373 return; 3374 3375 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3376 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3377 } 3378 3379 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3380 { 3381 struct gaudi_device *gaudi = hdev->asic_specific; 3382 u32 tpc_offset = 0; 3383 int tpc_id; 3384 3385 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3386 return; 3387 3388 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3389 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3390 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3391 } 3392 } 3393 3394 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3395 { 3396 struct gaudi_device *gaudi = hdev->asic_specific; 3397 u32 nic_mask, nic_offset = 0; 3398 u32 nic_delta_between_qmans = 3399 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3400 u32 nic_delta_between_nics = 3401 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3402 int nic_id; 3403 3404 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3405 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3406 3407 if (gaudi->hw_cap_initialized & nic_mask) 3408 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3409 3410 nic_offset += nic_delta_between_qmans; 3411 if (nic_id & 1) { 3412 nic_offset -= (nic_delta_between_qmans * 2); 3413 nic_offset += nic_delta_between_nics; 3414 } 3415 } 3416 } 3417 3418 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3419 { 3420 struct gaudi_device *gaudi = hdev->asic_specific; 3421 3422 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3423 return; 3424 3425 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3426 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3427 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3428 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3429 } 3430 3431 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3432 { 3433 struct gaudi_device *gaudi = hdev->asic_specific; 3434 3435 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3436 return; 3437 3438 /* Stop CPs of HBM DMA QMANs */ 3439 3440 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3441 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3442 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3443 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3444 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3445 } 3446 3447 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3448 { 3449 struct gaudi_device *gaudi = hdev->asic_specific; 3450 3451 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3452 return; 3453 3454 /* Stop CPs of MME QMANs */ 3455 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3456 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3457 } 3458 3459 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3460 { 3461 struct gaudi_device *gaudi = hdev->asic_specific; 3462 3463 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3464 return; 3465 3466 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3467 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3468 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3469 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3470 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3471 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3472 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3473 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3474 } 3475 3476 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3477 { 3478 struct gaudi_device *gaudi = hdev->asic_specific; 3479 3480 /* Stop upper CPs of QMANs */ 3481 3482 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3483 WREG32(mmNIC0_QM0_GLBL_CFG1, 3484 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3485 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3486 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3487 3488 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3489 WREG32(mmNIC0_QM1_GLBL_CFG1, 3490 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3491 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3492 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3493 3494 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3495 WREG32(mmNIC1_QM0_GLBL_CFG1, 3496 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3497 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3498 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3499 3500 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3501 WREG32(mmNIC1_QM1_GLBL_CFG1, 3502 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3503 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3504 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3505 3506 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3507 WREG32(mmNIC2_QM0_GLBL_CFG1, 3508 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3509 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3510 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3511 3512 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3513 WREG32(mmNIC2_QM1_GLBL_CFG1, 3514 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3515 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3516 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3517 3518 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3519 WREG32(mmNIC3_QM0_GLBL_CFG1, 3520 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3521 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3522 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3523 3524 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3525 WREG32(mmNIC3_QM1_GLBL_CFG1, 3526 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3527 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3528 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3529 3530 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3531 WREG32(mmNIC4_QM0_GLBL_CFG1, 3532 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3533 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3534 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3535 3536 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3537 WREG32(mmNIC4_QM1_GLBL_CFG1, 3538 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3539 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3540 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3541 } 3542 3543 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3544 { 3545 struct gaudi_device *gaudi = hdev->asic_specific; 3546 3547 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3548 return; 3549 3550 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3551 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3552 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3553 } 3554 3555 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3556 { 3557 struct gaudi_device *gaudi = hdev->asic_specific; 3558 3559 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3560 return; 3561 3562 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3563 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3564 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3565 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3566 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3567 } 3568 3569 static void gaudi_mme_stall(struct hl_device *hdev) 3570 { 3571 struct gaudi_device *gaudi = hdev->asic_specific; 3572 3573 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3574 return; 3575 3576 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3577 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3578 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3579 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3580 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3581 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3582 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3583 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3584 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3585 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3586 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3587 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3588 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3589 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3590 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3591 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3592 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3593 } 3594 3595 static void gaudi_tpc_stall(struct hl_device *hdev) 3596 { 3597 struct gaudi_device *gaudi = hdev->asic_specific; 3598 3599 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3600 return; 3601 3602 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3603 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3604 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3605 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3606 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3607 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3608 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3609 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3610 } 3611 3612 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3613 { 3614 u32 qman_offset; 3615 int i; 3616 3617 if (hdev->asic_prop.fw_security_enabled) 3618 return; 3619 3620 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3621 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3622 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3623 3624 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3625 } 3626 3627 WREG32(mmMME0_QM_CGM_CFG, 0); 3628 WREG32(mmMME0_QM_CGM_CFG1, 0); 3629 WREG32(mmMME2_QM_CGM_CFG, 0); 3630 WREG32(mmMME2_QM_CGM_CFG1, 0); 3631 3632 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3633 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3634 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3635 3636 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3637 } 3638 } 3639 3640 static void gaudi_enable_timestamp(struct hl_device *hdev) 3641 { 3642 /* Disable the timestamp counter */ 3643 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3644 3645 /* Zero the lower/upper parts of the 64-bit counter */ 3646 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3647 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3648 3649 /* Enable the counter */ 3650 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3651 } 3652 3653 static void gaudi_disable_timestamp(struct hl_device *hdev) 3654 { 3655 /* Disable the timestamp counter */ 3656 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3657 } 3658 3659 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3660 { 3661 u32 wait_timeout_ms; 3662 3663 if (hdev->pldm) 3664 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3665 else 3666 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3667 3668 if (fw_reset) 3669 goto skip_engines; 3670 3671 gaudi_stop_nic_qmans(hdev); 3672 gaudi_stop_mme_qmans(hdev); 3673 gaudi_stop_tpc_qmans(hdev); 3674 gaudi_stop_hbm_dma_qmans(hdev); 3675 gaudi_stop_pci_dma_qmans(hdev); 3676 3677 msleep(wait_timeout_ms); 3678 3679 gaudi_pci_dma_stall(hdev); 3680 gaudi_hbm_dma_stall(hdev); 3681 gaudi_tpc_stall(hdev); 3682 gaudi_mme_stall(hdev); 3683 3684 msleep(wait_timeout_ms); 3685 3686 gaudi_disable_nic_qmans(hdev); 3687 gaudi_disable_mme_qmans(hdev); 3688 gaudi_disable_tpc_qmans(hdev); 3689 gaudi_disable_hbm_dma_qmans(hdev); 3690 gaudi_disable_pci_dma_qmans(hdev); 3691 3692 gaudi_disable_timestamp(hdev); 3693 3694 skip_engines: 3695 gaudi_disable_msi(hdev); 3696 } 3697 3698 static int gaudi_mmu_init(struct hl_device *hdev) 3699 { 3700 struct asic_fixed_properties *prop = &hdev->asic_prop; 3701 struct gaudi_device *gaudi = hdev->asic_specific; 3702 u64 hop0_addr; 3703 int rc, i; 3704 3705 if (!hdev->mmu_enable) 3706 return 0; 3707 3708 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3709 return 0; 3710 3711 for (i = 0 ; i < prop->max_asid ; i++) { 3712 hop0_addr = prop->mmu_pgt_addr + 3713 (i * prop->mmu_hop_table_size); 3714 3715 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3716 if (rc) { 3717 dev_err(hdev->dev, 3718 "failed to set hop0 addr for asid %d\n", i); 3719 goto err; 3720 } 3721 } 3722 3723 /* init MMU cache manage page */ 3724 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3725 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3726 3727 /* mem cache invalidation */ 3728 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3729 3730 hl_mmu_invalidate_cache(hdev, true, 0); 3731 3732 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3733 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3734 3735 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3736 3737 /* 3738 * The H/W expects the first PI after init to be 1. After wraparound 3739 * we'll write 0. 3740 */ 3741 gaudi->mmu_cache_inv_pi = 1; 3742 3743 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3744 3745 return 0; 3746 3747 err: 3748 return rc; 3749 } 3750 3751 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3752 { 3753 void __iomem *dst; 3754 3755 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3756 3757 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3758 } 3759 3760 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3761 { 3762 void __iomem *dst; 3763 3764 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3765 3766 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3767 } 3768 3769 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3770 { 3771 struct dynamic_fw_load_mgr *dynamic_loader; 3772 struct cpu_dyn_regs *dyn_regs; 3773 3774 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3775 3776 /* 3777 * here we update initial values for few specific dynamic regs (as 3778 * before reading the first descriptor from FW those value has to be 3779 * hard-coded) in later stages of the protocol those values will be 3780 * updated automatically by reading the FW descriptor so data there 3781 * will always be up-to-date 3782 */ 3783 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3784 dyn_regs->kmd_msg_to_cpu = 3785 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3786 dyn_regs->cpu_cmd_status_to_host = 3787 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3788 3789 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3790 } 3791 3792 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3793 { 3794 struct static_fw_load_mgr *static_loader; 3795 3796 static_loader = &hdev->fw_loader.static_loader; 3797 3798 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3799 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3800 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3801 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3802 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3803 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3804 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3805 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3806 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3807 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3808 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3809 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3810 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3811 GAUDI_PLDM_RESET_WAIT_MSEC : 3812 GAUDI_CPU_RESET_WAIT_MSEC; 3813 } 3814 3815 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3816 { 3817 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3818 3819 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3820 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3821 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3822 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3823 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3824 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3825 } 3826 3827 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3828 { 3829 struct asic_fixed_properties *prop = &hdev->asic_prop; 3830 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3831 3832 /* fill common fields */ 3833 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3834 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3835 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3836 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3837 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3838 fw_loader->skip_bmc = !hdev->bmc_enable; 3839 fw_loader->sram_bar_id = SRAM_BAR_ID; 3840 fw_loader->dram_bar_id = HBM_BAR_ID; 3841 3842 if (prop->dynamic_fw_load) 3843 gaudi_init_dynamic_firmware_loader(hdev); 3844 else 3845 gaudi_init_static_firmware_loader(hdev); 3846 } 3847 3848 static int gaudi_init_cpu(struct hl_device *hdev) 3849 { 3850 struct gaudi_device *gaudi = hdev->asic_specific; 3851 int rc; 3852 3853 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3854 return 0; 3855 3856 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3857 return 0; 3858 3859 /* 3860 * The device CPU works with 40 bits addresses. 3861 * This register sets the extension to 50 bits. 3862 */ 3863 if (!hdev->asic_prop.fw_security_enabled) 3864 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3865 3866 rc = hl_fw_init_cpu(hdev); 3867 3868 if (rc) 3869 return rc; 3870 3871 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3872 3873 return 0; 3874 } 3875 3876 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3877 { 3878 struct cpu_dyn_regs *dyn_regs = 3879 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3880 struct asic_fixed_properties *prop = &hdev->asic_prop; 3881 struct gaudi_device *gaudi = hdev->asic_specific; 3882 u32 status, irq_handler_offset; 3883 struct hl_eq *eq; 3884 struct hl_hw_queue *cpu_pq = 3885 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3886 int err; 3887 3888 if (!hdev->cpu_queues_enable) 3889 return 0; 3890 3891 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3892 return 0; 3893 3894 eq = &hdev->event_queue; 3895 3896 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3897 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3898 3899 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3900 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3901 3902 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3903 lower_32_bits(hdev->cpu_accessible_dma_address)); 3904 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3905 upper_32_bits(hdev->cpu_accessible_dma_address)); 3906 3907 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3908 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3909 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3910 3911 /* Used for EQ CI */ 3912 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3913 3914 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3915 3916 if (gaudi->multi_msi_mode) 3917 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 3918 else 3919 WREG32(mmCPU_IF_QUEUE_INIT, 3920 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3921 3922 irq_handler_offset = prop->gic_interrupts_enable ? 3923 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3924 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3925 3926 WREG32(irq_handler_offset, 3927 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3928 3929 err = hl_poll_timeout( 3930 hdev, 3931 mmCPU_IF_QUEUE_INIT, 3932 status, 3933 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3934 1000, 3935 cpu_timeout); 3936 3937 if (err) { 3938 dev_err(hdev->dev, 3939 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3940 return -EIO; 3941 } 3942 3943 /* update FW application security bits */ 3944 if (prop->fw_cpu_boot_dev_sts0_valid) 3945 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3946 if (prop->fw_cpu_boot_dev_sts1_valid) 3947 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3948 3949 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3950 return 0; 3951 } 3952 3953 static void gaudi_pre_hw_init(struct hl_device *hdev) 3954 { 3955 /* Perform read from the device to make sure device is up */ 3956 RREG32(mmHW_STATE); 3957 3958 if (!hdev->asic_prop.fw_security_enabled) { 3959 /* Set the access through PCI bars (Linux driver only) as 3960 * secured 3961 */ 3962 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3963 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3964 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3965 3966 /* Perform read to flush the waiting writes to ensure 3967 * configuration was set in the device 3968 */ 3969 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3970 } 3971 3972 /* 3973 * Let's mark in the H/W that we have reached this point. We check 3974 * this value in the reset_before_init function to understand whether 3975 * we need to reset the chip before doing H/W init. This register is 3976 * cleared by the H/W upon H/W reset 3977 */ 3978 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3979 } 3980 3981 static int gaudi_hw_init(struct hl_device *hdev) 3982 { 3983 struct gaudi_device *gaudi = hdev->asic_specific; 3984 int rc; 3985 3986 gaudi_pre_hw_init(hdev); 3987 3988 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3989 * So we set it here and if anyone tries to move it later to 3990 * a different address, there will be an error 3991 */ 3992 if (hdev->asic_prop.iatu_done_by_fw) 3993 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3994 3995 /* 3996 * Before pushing u-boot/linux to device, need to set the hbm bar to 3997 * base address of dram 3998 */ 3999 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 4000 dev_err(hdev->dev, 4001 "failed to map HBM bar to DRAM base address\n"); 4002 return -EIO; 4003 } 4004 4005 rc = gaudi_init_cpu(hdev); 4006 if (rc) { 4007 dev_err(hdev->dev, "failed to initialize CPU\n"); 4008 return rc; 4009 } 4010 4011 /* In case the clock gating was enabled in preboot we need to disable 4012 * it here before touching the MME/TPC registers. 4013 */ 4014 gaudi_disable_clock_gating(hdev); 4015 4016 /* SRAM scrambler must be initialized after CPU is running from HBM */ 4017 gaudi_init_scrambler_sram(hdev); 4018 4019 /* This is here just in case we are working without CPU */ 4020 gaudi_init_scrambler_hbm(hdev); 4021 4022 gaudi_init_golden_registers(hdev); 4023 4024 rc = gaudi_mmu_init(hdev); 4025 if (rc) 4026 return rc; 4027 4028 gaudi_init_security(hdev); 4029 4030 gaudi_init_pci_dma_qmans(hdev); 4031 4032 gaudi_init_hbm_dma_qmans(hdev); 4033 4034 gaudi_init_mme_qmans(hdev); 4035 4036 gaudi_init_tpc_qmans(hdev); 4037 4038 gaudi_init_nic_qmans(hdev); 4039 4040 gaudi_enable_timestamp(hdev); 4041 4042 /* MSI must be enabled before CPU queues and NIC are initialized */ 4043 rc = gaudi_enable_msi(hdev); 4044 if (rc) 4045 goto disable_queues; 4046 4047 /* must be called after MSI was enabled */ 4048 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 4049 if (rc) { 4050 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 4051 rc); 4052 goto disable_msi; 4053 } 4054 4055 /* Perform read from the device to flush all configuration */ 4056 RREG32(mmHW_STATE); 4057 4058 return 0; 4059 4060 disable_msi: 4061 gaudi_disable_msi(hdev); 4062 disable_queues: 4063 gaudi_disable_mme_qmans(hdev); 4064 gaudi_disable_pci_dma_qmans(hdev); 4065 4066 return rc; 4067 } 4068 4069 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4070 { 4071 struct cpu_dyn_regs *dyn_regs = 4072 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4073 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4074 struct gaudi_device *gaudi = hdev->asic_specific; 4075 bool driver_performs_reset; 4076 4077 if (!hard_reset) { 4078 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4079 return; 4080 } 4081 4082 if (hdev->pldm) { 4083 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4084 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4085 } else { 4086 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4087 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4088 } 4089 4090 if (fw_reset) { 4091 dev_dbg(hdev->dev, 4092 "Firmware performs HARD reset, going to wait %dms\n", 4093 reset_timeout_ms); 4094 4095 goto skip_reset; 4096 } 4097 4098 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4099 !hdev->asic_prop.hard_reset_done_by_fw); 4100 4101 /* Set device to handle FLR by H/W as we will put the device CPU to 4102 * halt mode 4103 */ 4104 if (driver_performs_reset) 4105 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4106 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4107 4108 /* If linux is loaded in the device CPU we need to communicate with it 4109 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4110 * registers in case of old F/Ws 4111 */ 4112 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4113 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4114 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4115 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4116 4117 WREG32(irq_handler_offset, 4118 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4119 4120 /* This is a hail-mary attempt to revive the card in the small chance that the 4121 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4122 * In that case, triggering reset through GIC won't help. We need to trigger the 4123 * reset as if Linux wasn't loaded. 4124 * 4125 * We do it only if the reset cause was HB, because that would be the indication 4126 * of such an event. 4127 * 4128 * In case watchdog hasn't expired but we still got HB, then this won't do any 4129 * damage. 4130 */ 4131 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4132 if (hdev->asic_prop.hard_reset_done_by_fw) 4133 hl_fw_ask_hard_reset_without_linux(hdev); 4134 else 4135 hl_fw_ask_halt_machine_without_linux(hdev); 4136 } 4137 } else { 4138 if (hdev->asic_prop.hard_reset_done_by_fw) 4139 hl_fw_ask_hard_reset_without_linux(hdev); 4140 else 4141 hl_fw_ask_halt_machine_without_linux(hdev); 4142 } 4143 4144 if (driver_performs_reset) { 4145 4146 /* Configure the reset registers. Must be done as early as 4147 * possible in case we fail during H/W initialization 4148 */ 4149 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4150 (CFG_RST_H_DMA_MASK | 4151 CFG_RST_H_MME_MASK | 4152 CFG_RST_H_SM_MASK | 4153 CFG_RST_H_TPC_7_MASK)); 4154 4155 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4156 4157 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4158 (CFG_RST_H_HBM_MASK | 4159 CFG_RST_H_TPC_7_MASK | 4160 CFG_RST_H_NIC_MASK | 4161 CFG_RST_H_SM_MASK | 4162 CFG_RST_H_DMA_MASK | 4163 CFG_RST_H_MME_MASK | 4164 CFG_RST_H_CPU_MASK | 4165 CFG_RST_H_MMU_MASK)); 4166 4167 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4168 (CFG_RST_L_IF_MASK | 4169 CFG_RST_L_PSOC_MASK | 4170 CFG_RST_L_TPC_MASK)); 4171 4172 msleep(cpu_timeout_ms); 4173 4174 /* Tell ASIC not to re-initialize PCIe */ 4175 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4176 4177 /* Restart BTL/BLR upon hard-reset */ 4178 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4179 4180 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4181 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4182 4183 dev_dbg(hdev->dev, 4184 "Issued HARD reset command, going to wait %dms\n", 4185 reset_timeout_ms); 4186 } else { 4187 dev_dbg(hdev->dev, 4188 "Firmware performs HARD reset, going to wait %dms\n", 4189 reset_timeout_ms); 4190 } 4191 4192 skip_reset: 4193 /* 4194 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4195 * itself is in reset. Need to wait until the reset is deasserted 4196 */ 4197 msleep(reset_timeout_ms); 4198 4199 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4200 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) 4201 dev_err(hdev->dev, 4202 "Timeout while waiting for device to reset 0x%x\n", 4203 status); 4204 4205 if (gaudi) { 4206 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4207 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4208 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4209 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4210 HW_CAP_HBM_SCRAMBLER); 4211 4212 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4213 4214 hdev->device_cpu_is_halted = false; 4215 } 4216 } 4217 4218 static int gaudi_suspend(struct hl_device *hdev) 4219 { 4220 int rc; 4221 4222 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4223 if (rc) 4224 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4225 4226 return rc; 4227 } 4228 4229 static int gaudi_resume(struct hl_device *hdev) 4230 { 4231 return gaudi_init_iatu(hdev); 4232 } 4233 4234 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4235 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4236 { 4237 int rc; 4238 4239 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4240 VM_DONTCOPY | VM_NORESERVE; 4241 4242 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4243 (dma_addr - HOST_PHYS_BASE), size); 4244 if (rc) 4245 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4246 4247 return rc; 4248 } 4249 4250 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4251 { 4252 struct cpu_dyn_regs *dyn_regs = 4253 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4254 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4255 struct gaudi_device *gaudi = hdev->asic_specific; 4256 bool invalid_queue = false; 4257 int dma_id; 4258 4259 switch (hw_queue_id) { 4260 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4261 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4262 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4263 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4264 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4265 break; 4266 4267 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4268 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4269 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4270 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4271 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4272 break; 4273 4274 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4275 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4276 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4277 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4278 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4279 break; 4280 4281 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4282 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4283 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4284 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4285 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4286 break; 4287 4288 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4289 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4290 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4291 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4292 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4293 break; 4294 4295 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4296 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4297 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4298 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4299 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4300 break; 4301 4302 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4303 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4304 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4305 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4306 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4307 break; 4308 4309 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4310 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4311 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4312 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4313 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4314 break; 4315 4316 case GAUDI_QUEUE_ID_CPU_PQ: 4317 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4318 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4319 else 4320 invalid_queue = true; 4321 break; 4322 4323 case GAUDI_QUEUE_ID_MME_0_0: 4324 db_reg_offset = mmMME2_QM_PQ_PI_0; 4325 break; 4326 4327 case GAUDI_QUEUE_ID_MME_0_1: 4328 db_reg_offset = mmMME2_QM_PQ_PI_1; 4329 break; 4330 4331 case GAUDI_QUEUE_ID_MME_0_2: 4332 db_reg_offset = mmMME2_QM_PQ_PI_2; 4333 break; 4334 4335 case GAUDI_QUEUE_ID_MME_0_3: 4336 db_reg_offset = mmMME2_QM_PQ_PI_3; 4337 break; 4338 4339 case GAUDI_QUEUE_ID_MME_1_0: 4340 db_reg_offset = mmMME0_QM_PQ_PI_0; 4341 break; 4342 4343 case GAUDI_QUEUE_ID_MME_1_1: 4344 db_reg_offset = mmMME0_QM_PQ_PI_1; 4345 break; 4346 4347 case GAUDI_QUEUE_ID_MME_1_2: 4348 db_reg_offset = mmMME0_QM_PQ_PI_2; 4349 break; 4350 4351 case GAUDI_QUEUE_ID_MME_1_3: 4352 db_reg_offset = mmMME0_QM_PQ_PI_3; 4353 break; 4354 4355 case GAUDI_QUEUE_ID_TPC_0_0: 4356 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4357 break; 4358 4359 case GAUDI_QUEUE_ID_TPC_0_1: 4360 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4361 break; 4362 4363 case GAUDI_QUEUE_ID_TPC_0_2: 4364 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4365 break; 4366 4367 case GAUDI_QUEUE_ID_TPC_0_3: 4368 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4369 break; 4370 4371 case GAUDI_QUEUE_ID_TPC_1_0: 4372 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4373 break; 4374 4375 case GAUDI_QUEUE_ID_TPC_1_1: 4376 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4377 break; 4378 4379 case GAUDI_QUEUE_ID_TPC_1_2: 4380 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4381 break; 4382 4383 case GAUDI_QUEUE_ID_TPC_1_3: 4384 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4385 break; 4386 4387 case GAUDI_QUEUE_ID_TPC_2_0: 4388 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4389 break; 4390 4391 case GAUDI_QUEUE_ID_TPC_2_1: 4392 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4393 break; 4394 4395 case GAUDI_QUEUE_ID_TPC_2_2: 4396 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4397 break; 4398 4399 case GAUDI_QUEUE_ID_TPC_2_3: 4400 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4401 break; 4402 4403 case GAUDI_QUEUE_ID_TPC_3_0: 4404 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4405 break; 4406 4407 case GAUDI_QUEUE_ID_TPC_3_1: 4408 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4409 break; 4410 4411 case GAUDI_QUEUE_ID_TPC_3_2: 4412 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4413 break; 4414 4415 case GAUDI_QUEUE_ID_TPC_3_3: 4416 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4417 break; 4418 4419 case GAUDI_QUEUE_ID_TPC_4_0: 4420 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4421 break; 4422 4423 case GAUDI_QUEUE_ID_TPC_4_1: 4424 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4425 break; 4426 4427 case GAUDI_QUEUE_ID_TPC_4_2: 4428 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4429 break; 4430 4431 case GAUDI_QUEUE_ID_TPC_4_3: 4432 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4433 break; 4434 4435 case GAUDI_QUEUE_ID_TPC_5_0: 4436 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4437 break; 4438 4439 case GAUDI_QUEUE_ID_TPC_5_1: 4440 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4441 break; 4442 4443 case GAUDI_QUEUE_ID_TPC_5_2: 4444 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4445 break; 4446 4447 case GAUDI_QUEUE_ID_TPC_5_3: 4448 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4449 break; 4450 4451 case GAUDI_QUEUE_ID_TPC_6_0: 4452 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4453 break; 4454 4455 case GAUDI_QUEUE_ID_TPC_6_1: 4456 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4457 break; 4458 4459 case GAUDI_QUEUE_ID_TPC_6_2: 4460 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4461 break; 4462 4463 case GAUDI_QUEUE_ID_TPC_6_3: 4464 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4465 break; 4466 4467 case GAUDI_QUEUE_ID_TPC_7_0: 4468 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4469 break; 4470 4471 case GAUDI_QUEUE_ID_TPC_7_1: 4472 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4473 break; 4474 4475 case GAUDI_QUEUE_ID_TPC_7_2: 4476 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4477 break; 4478 4479 case GAUDI_QUEUE_ID_TPC_7_3: 4480 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4481 break; 4482 4483 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4484 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4485 invalid_queue = true; 4486 4487 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4488 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4489 break; 4490 4491 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4492 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4493 invalid_queue = true; 4494 4495 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4496 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4497 break; 4498 4499 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4500 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4501 invalid_queue = true; 4502 4503 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4504 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4505 break; 4506 4507 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4508 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4509 invalid_queue = true; 4510 4511 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4512 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4513 break; 4514 4515 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4516 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4517 invalid_queue = true; 4518 4519 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4520 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4521 break; 4522 4523 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4524 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4525 invalid_queue = true; 4526 4527 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4528 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4529 break; 4530 4531 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4532 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4533 invalid_queue = true; 4534 4535 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4536 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4537 break; 4538 4539 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4540 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4541 invalid_queue = true; 4542 4543 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4544 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4545 break; 4546 4547 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4548 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4549 invalid_queue = true; 4550 4551 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4552 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4553 break; 4554 4555 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4556 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4557 invalid_queue = true; 4558 4559 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4560 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4561 break; 4562 4563 default: 4564 invalid_queue = true; 4565 } 4566 4567 if (invalid_queue) { 4568 /* Should never get here */ 4569 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4570 hw_queue_id); 4571 return; 4572 } 4573 4574 db_value = pi; 4575 4576 /* ring the doorbell */ 4577 WREG32(db_reg_offset, db_value); 4578 4579 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4580 /* make sure device CPU will read latest data from host */ 4581 mb(); 4582 4583 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4584 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4585 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4586 4587 WREG32(irq_handler_offset, 4588 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4589 } 4590 } 4591 4592 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4593 struct hl_bd *bd) 4594 { 4595 __le64 *pbd = (__le64 *) bd; 4596 4597 /* The QMANs are on the host memory so a simple copy suffice */ 4598 pqe[0] = pbd[0]; 4599 pqe[1] = pbd[1]; 4600 } 4601 4602 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4603 dma_addr_t *dma_handle, gfp_t flags) 4604 { 4605 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4606 dma_handle, flags); 4607 4608 /* Shift to the device's base physical address of host memory */ 4609 if (kernel_addr) 4610 *dma_handle += HOST_PHYS_BASE; 4611 4612 return kernel_addr; 4613 } 4614 4615 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4616 void *cpu_addr, dma_addr_t dma_handle) 4617 { 4618 /* Cancel the device's base physical address of host memory */ 4619 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4620 4621 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4622 } 4623 4624 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4625 { 4626 struct asic_fixed_properties *prop = &hdev->asic_prop; 4627 u64 cur_addr = prop->dram_user_base_address; 4628 u32 chunk_size, busy; 4629 int rc, dma_id; 4630 4631 while (cur_addr < prop->dram_end_address) { 4632 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4633 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4634 4635 chunk_size = 4636 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4637 4638 dev_dbg(hdev->dev, 4639 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4640 cur_addr, cur_addr + chunk_size); 4641 4642 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4643 lower_32_bits(val)); 4644 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4645 upper_32_bits(val)); 4646 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4647 lower_32_bits(cur_addr)); 4648 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4649 upper_32_bits(cur_addr)); 4650 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4651 chunk_size); 4652 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4653 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4654 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4655 4656 cur_addr += chunk_size; 4657 4658 if (cur_addr == prop->dram_end_address) 4659 break; 4660 } 4661 4662 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4663 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4664 4665 rc = hl_poll_timeout( 4666 hdev, 4667 mmDMA0_CORE_STS0 + dma_offset, 4668 busy, 4669 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4670 1000, 4671 HBM_SCRUBBING_TIMEOUT_US); 4672 4673 if (rc) { 4674 dev_err(hdev->dev, 4675 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4676 dma_id); 4677 return -EIO; 4678 } 4679 } 4680 } 4681 4682 return 0; 4683 } 4684 4685 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4686 { 4687 struct asic_fixed_properties *prop = &hdev->asic_prop; 4688 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : 4689 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); 4690 u64 addr, size, val = hdev->memory_scrub_val; 4691 ktime_t timeout; 4692 int rc = 0; 4693 4694 if (!hdev->memory_scrub) 4695 return 0; 4696 4697 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4698 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4699 if (ktime_compare(ktime_get(), timeout) > 0) { 4700 dev_err(hdev->dev, "waiting for idle timeout\n"); 4701 return -ETIMEDOUT; 4702 } 4703 usleep_range((1000 >> 2) + 1, 1000); 4704 } 4705 4706 /* Scrub SRAM */ 4707 addr = prop->sram_user_base_address; 4708 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4709 4710 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4711 addr, addr + size, val); 4712 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4713 if (rc) { 4714 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4715 return rc; 4716 } 4717 4718 /* Scrub HBM using all DMA channels in parallel */ 4719 rc = gaudi_scrub_device_dram(hdev, val); 4720 if (rc) { 4721 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4722 return rc; 4723 } 4724 4725 return 0; 4726 } 4727 4728 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4729 u32 queue_id, dma_addr_t *dma_handle, 4730 u16 *queue_len) 4731 { 4732 struct gaudi_device *gaudi = hdev->asic_specific; 4733 struct gaudi_internal_qman_info *q; 4734 4735 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4736 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4737 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4738 return NULL; 4739 } 4740 4741 q = &gaudi->internal_qmans[queue_id]; 4742 *dma_handle = q->pq_dma_addr; 4743 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4744 4745 return q->pq_kernel_addr; 4746 } 4747 4748 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4749 u16 len, u32 timeout, u64 *result) 4750 { 4751 struct gaudi_device *gaudi = hdev->asic_specific; 4752 4753 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4754 if (result) 4755 *result = 0; 4756 return 0; 4757 } 4758 4759 if (!timeout) 4760 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4761 4762 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4763 timeout, result); 4764 } 4765 4766 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4767 { 4768 struct packet_msg_prot *fence_pkt; 4769 dma_addr_t pkt_dma_addr; 4770 u32 fence_val, tmp, timeout_usec; 4771 dma_addr_t fence_dma_addr; 4772 u32 *fence_ptr; 4773 int rc; 4774 4775 if (hdev->pldm) 4776 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4777 else 4778 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4779 4780 fence_val = GAUDI_QMAN0_FENCE_VAL; 4781 4782 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4783 if (!fence_ptr) { 4784 dev_err(hdev->dev, 4785 "Failed to allocate memory for H/W queue %d testing\n", 4786 hw_queue_id); 4787 return -ENOMEM; 4788 } 4789 4790 *fence_ptr = 0; 4791 4792 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4793 &pkt_dma_addr); 4794 if (!fence_pkt) { 4795 dev_err(hdev->dev, 4796 "Failed to allocate packet for H/W queue %d testing\n", 4797 hw_queue_id); 4798 rc = -ENOMEM; 4799 goto free_fence_ptr; 4800 } 4801 4802 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4803 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4804 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4805 4806 fence_pkt->ctl = cpu_to_le32(tmp); 4807 fence_pkt->value = cpu_to_le32(fence_val); 4808 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4809 4810 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4811 sizeof(struct packet_msg_prot), 4812 pkt_dma_addr); 4813 if (rc) { 4814 dev_err(hdev->dev, 4815 "Failed to send fence packet to H/W queue %d\n", 4816 hw_queue_id); 4817 goto free_pkt; 4818 } 4819 4820 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4821 1000, timeout_usec, true); 4822 4823 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4824 4825 if (rc == -ETIMEDOUT) { 4826 dev_err(hdev->dev, 4827 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4828 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4829 rc = -EIO; 4830 } 4831 4832 free_pkt: 4833 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4834 free_fence_ptr: 4835 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4836 return rc; 4837 } 4838 4839 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4840 { 4841 struct gaudi_device *gaudi = hdev->asic_specific; 4842 4843 /* 4844 * check capability here as send_cpu_message() won't update the result 4845 * value if no capability 4846 */ 4847 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4848 return 0; 4849 4850 return hl_fw_test_cpu_queue(hdev); 4851 } 4852 4853 static int gaudi_test_queues(struct hl_device *hdev) 4854 { 4855 int i, rc, ret_val = 0; 4856 4857 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4858 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4859 rc = gaudi_test_queue(hdev, i); 4860 if (rc) 4861 ret_val = -EINVAL; 4862 } 4863 } 4864 4865 rc = gaudi_test_cpu_queue(hdev); 4866 if (rc) 4867 ret_val = -EINVAL; 4868 4869 return ret_val; 4870 } 4871 4872 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4873 gfp_t mem_flags, dma_addr_t *dma_handle) 4874 { 4875 void *kernel_addr; 4876 4877 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4878 return NULL; 4879 4880 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4881 4882 /* Shift to the device's base physical address of host memory */ 4883 if (kernel_addr) 4884 *dma_handle += HOST_PHYS_BASE; 4885 4886 return kernel_addr; 4887 } 4888 4889 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4890 dma_addr_t dma_addr) 4891 { 4892 /* Cancel the device's base physical address of host memory */ 4893 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4894 4895 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4896 } 4897 4898 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4899 size_t size, dma_addr_t *dma_handle) 4900 { 4901 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4902 } 4903 4904 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4905 size_t size, void *vaddr) 4906 { 4907 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4908 } 4909 4910 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4911 { 4912 struct scatterlist *sg, *sg_next_iter; 4913 u32 count, dma_desc_cnt; 4914 u64 len, len_next; 4915 dma_addr_t addr, addr_next; 4916 4917 dma_desc_cnt = 0; 4918 4919 for_each_sgtable_dma_sg(sgt, sg, count) { 4920 len = sg_dma_len(sg); 4921 addr = sg_dma_address(sg); 4922 4923 if (len == 0) 4924 break; 4925 4926 while ((count + 1) < sgt->nents) { 4927 sg_next_iter = sg_next(sg); 4928 len_next = sg_dma_len(sg_next_iter); 4929 addr_next = sg_dma_address(sg_next_iter); 4930 4931 if (len_next == 0) 4932 break; 4933 4934 if ((addr + len == addr_next) && 4935 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4936 len += len_next; 4937 count++; 4938 sg = sg_next_iter; 4939 } else { 4940 break; 4941 } 4942 } 4943 4944 dma_desc_cnt++; 4945 } 4946 4947 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4948 } 4949 4950 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4951 struct hl_cs_parser *parser, 4952 struct packet_lin_dma *user_dma_pkt, 4953 u64 addr, enum dma_data_direction dir) 4954 { 4955 struct hl_userptr *userptr; 4956 int rc; 4957 4958 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4959 parser->job_userptr_list, &userptr)) 4960 goto already_pinned; 4961 4962 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4963 if (!userptr) 4964 return -ENOMEM; 4965 4966 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4967 userptr); 4968 if (rc) 4969 goto free_userptr; 4970 4971 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4972 4973 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 4974 if (rc) { 4975 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4976 goto unpin_memory; 4977 } 4978 4979 userptr->dma_mapped = true; 4980 userptr->dir = dir; 4981 4982 already_pinned: 4983 parser->patched_cb_size += 4984 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4985 4986 return 0; 4987 4988 unpin_memory: 4989 list_del(&userptr->job_node); 4990 hl_unpin_host_memory(hdev, userptr); 4991 free_userptr: 4992 kfree(userptr); 4993 return rc; 4994 } 4995 4996 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4997 struct hl_cs_parser *parser, 4998 struct packet_lin_dma *user_dma_pkt, 4999 bool src_in_host) 5000 { 5001 enum dma_data_direction dir; 5002 bool skip_host_mem_pin = false, user_memset; 5003 u64 addr; 5004 int rc = 0; 5005 5006 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 5007 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5008 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5009 5010 if (src_in_host) { 5011 if (user_memset) 5012 skip_host_mem_pin = true; 5013 5014 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 5015 dir = DMA_TO_DEVICE; 5016 addr = le64_to_cpu(user_dma_pkt->src_addr); 5017 } else { 5018 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 5019 dir = DMA_FROM_DEVICE; 5020 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5021 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5022 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5023 } 5024 5025 if (skip_host_mem_pin) 5026 parser->patched_cb_size += sizeof(*user_dma_pkt); 5027 else 5028 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 5029 addr, dir); 5030 5031 return rc; 5032 } 5033 5034 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 5035 struct hl_cs_parser *parser, 5036 struct packet_lin_dma *user_dma_pkt) 5037 { 5038 bool src_in_host = false; 5039 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5040 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5041 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5042 5043 dev_dbg(hdev->dev, "DMA packet details:\n"); 5044 dev_dbg(hdev->dev, "source == 0x%llx\n", 5045 le64_to_cpu(user_dma_pkt->src_addr)); 5046 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 5047 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 5048 5049 /* 5050 * Special handling for DMA with size 0. Bypass all validations 5051 * because no transactions will be done except for WR_COMP, which 5052 * is not a security issue 5053 */ 5054 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5055 parser->patched_cb_size += sizeof(*user_dma_pkt); 5056 return 0; 5057 } 5058 5059 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5060 src_in_host = true; 5061 5062 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5063 src_in_host); 5064 } 5065 5066 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5067 struct hl_cs_parser *parser, 5068 struct packet_load_and_exe *user_pkt) 5069 { 5070 u32 cfg; 5071 5072 cfg = le32_to_cpu(user_pkt->cfg); 5073 5074 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5075 dev_err(hdev->dev, 5076 "User not allowed to use Load and Execute\n"); 5077 return -EPERM; 5078 } 5079 5080 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5081 5082 return 0; 5083 } 5084 5085 static int gaudi_validate_cb(struct hl_device *hdev, 5086 struct hl_cs_parser *parser, bool is_mmu) 5087 { 5088 u32 cb_parsed_length = 0; 5089 int rc = 0; 5090 5091 parser->patched_cb_size = 0; 5092 5093 /* cb_user_size is more than 0 so loop will always be executed */ 5094 while (cb_parsed_length < parser->user_cb_size) { 5095 enum packet_id pkt_id; 5096 u16 pkt_size; 5097 struct gaudi_packet *user_pkt; 5098 5099 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5100 5101 pkt_id = (enum packet_id) ( 5102 (le64_to_cpu(user_pkt->header) & 5103 PACKET_HEADER_PACKET_ID_MASK) >> 5104 PACKET_HEADER_PACKET_ID_SHIFT); 5105 5106 if (!validate_packet_id(pkt_id)) { 5107 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5108 rc = -EINVAL; 5109 break; 5110 } 5111 5112 pkt_size = gaudi_packet_sizes[pkt_id]; 5113 cb_parsed_length += pkt_size; 5114 if (cb_parsed_length > parser->user_cb_size) { 5115 dev_err(hdev->dev, 5116 "packet 0x%x is out of CB boundary\n", pkt_id); 5117 rc = -EINVAL; 5118 break; 5119 } 5120 5121 switch (pkt_id) { 5122 case PACKET_MSG_PROT: 5123 dev_err(hdev->dev, 5124 "User not allowed to use MSG_PROT\n"); 5125 rc = -EPERM; 5126 break; 5127 5128 case PACKET_CP_DMA: 5129 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5130 rc = -EPERM; 5131 break; 5132 5133 case PACKET_STOP: 5134 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5135 rc = -EPERM; 5136 break; 5137 5138 case PACKET_WREG_BULK: 5139 dev_err(hdev->dev, 5140 "User not allowed to use WREG_BULK\n"); 5141 rc = -EPERM; 5142 break; 5143 5144 case PACKET_LOAD_AND_EXE: 5145 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5146 (struct packet_load_and_exe *) user_pkt); 5147 break; 5148 5149 case PACKET_LIN_DMA: 5150 parser->contains_dma_pkt = true; 5151 if (is_mmu) 5152 parser->patched_cb_size += pkt_size; 5153 else 5154 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5155 (struct packet_lin_dma *) user_pkt); 5156 break; 5157 5158 case PACKET_WREG_32: 5159 case PACKET_MSG_LONG: 5160 case PACKET_MSG_SHORT: 5161 case PACKET_REPEAT: 5162 case PACKET_FENCE: 5163 case PACKET_NOP: 5164 case PACKET_ARB_POINT: 5165 parser->patched_cb_size += pkt_size; 5166 break; 5167 5168 default: 5169 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5170 pkt_id); 5171 rc = -EINVAL; 5172 break; 5173 } 5174 5175 if (rc) 5176 break; 5177 } 5178 5179 /* 5180 * The new CB should have space at the end for two MSG_PROT packets: 5181 * 1. Optional NOP padding for cacheline alignment 5182 * 2. A packet that will act as a completion packet 5183 * 3. A packet that will generate MSI interrupt 5184 */ 5185 if (parser->completion) 5186 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5187 parser->patched_cb_size); 5188 5189 return rc; 5190 } 5191 5192 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5193 struct hl_cs_parser *parser, 5194 struct packet_lin_dma *user_dma_pkt, 5195 struct packet_lin_dma *new_dma_pkt, 5196 u32 *new_dma_pkt_size) 5197 { 5198 struct hl_userptr *userptr; 5199 struct scatterlist *sg, *sg_next_iter; 5200 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5201 u64 len, len_next; 5202 dma_addr_t dma_addr, dma_addr_next; 5203 u64 device_memory_addr, addr; 5204 enum dma_data_direction dir; 5205 struct sg_table *sgt; 5206 bool src_in_host = false; 5207 bool skip_host_mem_pin = false; 5208 bool user_memset; 5209 5210 ctl = le32_to_cpu(user_dma_pkt->ctl); 5211 5212 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5213 src_in_host = true; 5214 5215 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5216 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5217 5218 if (src_in_host) { 5219 addr = le64_to_cpu(user_dma_pkt->src_addr); 5220 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5221 dir = DMA_TO_DEVICE; 5222 if (user_memset) 5223 skip_host_mem_pin = true; 5224 } else { 5225 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5226 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5227 dir = DMA_FROM_DEVICE; 5228 } 5229 5230 if ((!skip_host_mem_pin) && 5231 (!hl_userptr_is_pinned(hdev, addr, 5232 le32_to_cpu(user_dma_pkt->tsize), 5233 parser->job_userptr_list, &userptr))) { 5234 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5235 addr, user_dma_pkt->tsize); 5236 return -EFAULT; 5237 } 5238 5239 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5240 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5241 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5242 return 0; 5243 } 5244 5245 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5246 5247 sgt = userptr->sgt; 5248 dma_desc_cnt = 0; 5249 5250 for_each_sgtable_dma_sg(sgt, sg, count) { 5251 len = sg_dma_len(sg); 5252 dma_addr = sg_dma_address(sg); 5253 5254 if (len == 0) 5255 break; 5256 5257 while ((count + 1) < sgt->nents) { 5258 sg_next_iter = sg_next(sg); 5259 len_next = sg_dma_len(sg_next_iter); 5260 dma_addr_next = sg_dma_address(sg_next_iter); 5261 5262 if (len_next == 0) 5263 break; 5264 5265 if ((dma_addr + len == dma_addr_next) && 5266 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5267 len += len_next; 5268 count++; 5269 sg = sg_next_iter; 5270 } else { 5271 break; 5272 } 5273 } 5274 5275 ctl = le32_to_cpu(user_dma_pkt->ctl); 5276 if (likely(dma_desc_cnt)) 5277 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5278 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5279 new_dma_pkt->ctl = cpu_to_le32(ctl); 5280 new_dma_pkt->tsize = cpu_to_le32(len); 5281 5282 if (dir == DMA_TO_DEVICE) { 5283 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5284 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5285 } else { 5286 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5287 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5288 } 5289 5290 if (!user_memset) 5291 device_memory_addr += len; 5292 dma_desc_cnt++; 5293 new_dma_pkt++; 5294 } 5295 5296 if (!dma_desc_cnt) { 5297 dev_err(hdev->dev, 5298 "Error of 0 SG entries when patching DMA packet\n"); 5299 return -EFAULT; 5300 } 5301 5302 /* Fix the last dma packet - wrcomp must be as user set it */ 5303 new_dma_pkt--; 5304 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5305 5306 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5307 5308 return 0; 5309 } 5310 5311 static int gaudi_patch_cb(struct hl_device *hdev, 5312 struct hl_cs_parser *parser) 5313 { 5314 u32 cb_parsed_length = 0; 5315 u32 cb_patched_cur_length = 0; 5316 int rc = 0; 5317 5318 /* cb_user_size is more than 0 so loop will always be executed */ 5319 while (cb_parsed_length < parser->user_cb_size) { 5320 enum packet_id pkt_id; 5321 u16 pkt_size; 5322 u32 new_pkt_size = 0; 5323 struct gaudi_packet *user_pkt, *kernel_pkt; 5324 5325 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5326 kernel_pkt = parser->patched_cb->kernel_address + 5327 cb_patched_cur_length; 5328 5329 pkt_id = (enum packet_id) ( 5330 (le64_to_cpu(user_pkt->header) & 5331 PACKET_HEADER_PACKET_ID_MASK) >> 5332 PACKET_HEADER_PACKET_ID_SHIFT); 5333 5334 if (!validate_packet_id(pkt_id)) { 5335 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5336 rc = -EINVAL; 5337 break; 5338 } 5339 5340 pkt_size = gaudi_packet_sizes[pkt_id]; 5341 cb_parsed_length += pkt_size; 5342 if (cb_parsed_length > parser->user_cb_size) { 5343 dev_err(hdev->dev, 5344 "packet 0x%x is out of CB boundary\n", pkt_id); 5345 rc = -EINVAL; 5346 break; 5347 } 5348 5349 switch (pkt_id) { 5350 case PACKET_LIN_DMA: 5351 rc = gaudi_patch_dma_packet(hdev, parser, 5352 (struct packet_lin_dma *) user_pkt, 5353 (struct packet_lin_dma *) kernel_pkt, 5354 &new_pkt_size); 5355 cb_patched_cur_length += new_pkt_size; 5356 break; 5357 5358 case PACKET_MSG_PROT: 5359 dev_err(hdev->dev, 5360 "User not allowed to use MSG_PROT\n"); 5361 rc = -EPERM; 5362 break; 5363 5364 case PACKET_CP_DMA: 5365 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5366 rc = -EPERM; 5367 break; 5368 5369 case PACKET_STOP: 5370 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5371 rc = -EPERM; 5372 break; 5373 5374 case PACKET_WREG_32: 5375 case PACKET_WREG_BULK: 5376 case PACKET_MSG_LONG: 5377 case PACKET_MSG_SHORT: 5378 case PACKET_REPEAT: 5379 case PACKET_FENCE: 5380 case PACKET_NOP: 5381 case PACKET_ARB_POINT: 5382 case PACKET_LOAD_AND_EXE: 5383 memcpy(kernel_pkt, user_pkt, pkt_size); 5384 cb_patched_cur_length += pkt_size; 5385 break; 5386 5387 default: 5388 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5389 pkt_id); 5390 rc = -EINVAL; 5391 break; 5392 } 5393 5394 if (rc) 5395 break; 5396 } 5397 5398 return rc; 5399 } 5400 5401 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5402 struct hl_cs_parser *parser) 5403 { 5404 u64 handle; 5405 u32 patched_cb_size; 5406 struct hl_cb *user_cb; 5407 int rc; 5408 5409 /* 5410 * The new CB should have space at the end for two MSG_PROT packets: 5411 * 1. Optional NOP padding for cacheline alignment 5412 * 2. A packet that will act as a completion packet 5413 * 3. A packet that will generate MSI interrupt 5414 */ 5415 if (parser->completion) 5416 parser->patched_cb_size = parser->user_cb_size + 5417 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5418 else 5419 parser->patched_cb_size = parser->user_cb_size; 5420 5421 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5422 parser->patched_cb_size, false, false, 5423 &handle); 5424 5425 if (rc) { 5426 dev_err(hdev->dev, 5427 "Failed to allocate patched CB for DMA CS %d\n", 5428 rc); 5429 return rc; 5430 } 5431 5432 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5433 /* hl_cb_get should never fail */ 5434 if (!parser->patched_cb) { 5435 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5436 rc = -EFAULT; 5437 goto out; 5438 } 5439 5440 /* 5441 * We are protected from overflow because the check 5442 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5443 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5444 * 5445 * There is no option to reach here without going through that check because: 5446 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5447 * an external queue. 5448 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5449 */ 5450 memcpy(parser->patched_cb->kernel_address, 5451 parser->user_cb->kernel_address, 5452 parser->user_cb_size); 5453 5454 patched_cb_size = parser->patched_cb_size; 5455 5456 /* Validate patched CB instead of user CB */ 5457 user_cb = parser->user_cb; 5458 parser->user_cb = parser->patched_cb; 5459 rc = gaudi_validate_cb(hdev, parser, true); 5460 parser->user_cb = user_cb; 5461 5462 if (rc) { 5463 hl_cb_put(parser->patched_cb); 5464 goto out; 5465 } 5466 5467 if (patched_cb_size != parser->patched_cb_size) { 5468 dev_err(hdev->dev, "user CB size mismatch\n"); 5469 hl_cb_put(parser->patched_cb); 5470 rc = -EINVAL; 5471 goto out; 5472 } 5473 5474 out: 5475 /* 5476 * Always call cb destroy here because we still have 1 reference 5477 * to it by calling cb_get earlier. After the job will be completed, 5478 * cb_put will release it, but here we want to remove it from the 5479 * idr 5480 */ 5481 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5482 5483 return rc; 5484 } 5485 5486 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5487 struct hl_cs_parser *parser) 5488 { 5489 u64 handle; 5490 int rc; 5491 5492 rc = gaudi_validate_cb(hdev, parser, false); 5493 5494 if (rc) 5495 goto free_userptr; 5496 5497 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5498 parser->patched_cb_size, false, false, 5499 &handle); 5500 if (rc) { 5501 dev_err(hdev->dev, 5502 "Failed to allocate patched CB for DMA CS %d\n", rc); 5503 goto free_userptr; 5504 } 5505 5506 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5507 /* hl_cb_get should never fail here */ 5508 if (!parser->patched_cb) { 5509 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5510 rc = -EFAULT; 5511 goto out; 5512 } 5513 5514 rc = gaudi_patch_cb(hdev, parser); 5515 5516 if (rc) 5517 hl_cb_put(parser->patched_cb); 5518 5519 out: 5520 /* 5521 * Always call cb destroy here because we still have 1 reference 5522 * to it by calling cb_get earlier. After the job will be completed, 5523 * cb_put will release it, but here we want to remove it from the 5524 * idr 5525 */ 5526 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5527 5528 free_userptr: 5529 if (rc) 5530 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5531 return rc; 5532 } 5533 5534 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5535 struct hl_cs_parser *parser) 5536 { 5537 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5538 struct gaudi_device *gaudi = hdev->asic_specific; 5539 u32 nic_queue_offset, nic_mask_q_id; 5540 5541 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5542 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5543 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5544 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5545 5546 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5547 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5548 return -EINVAL; 5549 } 5550 } 5551 5552 /* For internal queue jobs just check if CB address is valid */ 5553 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5554 parser->user_cb_size, 5555 asic_prop->sram_user_base_address, 5556 asic_prop->sram_end_address)) 5557 return 0; 5558 5559 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5560 parser->user_cb_size, 5561 asic_prop->dram_user_base_address, 5562 asic_prop->dram_end_address)) 5563 return 0; 5564 5565 /* PMMU and HPMMU addresses are equal, check only one of them */ 5566 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5567 parser->user_cb_size, 5568 asic_prop->pmmu.start_addr, 5569 asic_prop->pmmu.end_addr)) 5570 return 0; 5571 5572 dev_err(hdev->dev, 5573 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5574 parser->user_cb, parser->user_cb_size); 5575 5576 return -EFAULT; 5577 } 5578 5579 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5580 { 5581 struct gaudi_device *gaudi = hdev->asic_specific; 5582 5583 if (parser->queue_type == QUEUE_TYPE_INT) 5584 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5585 5586 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5587 return gaudi_parse_cb_mmu(hdev, parser); 5588 else 5589 return gaudi_parse_cb_no_mmu(hdev, parser); 5590 } 5591 5592 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5593 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5594 u32 msi_vec, bool eb) 5595 { 5596 struct gaudi_device *gaudi = hdev->asic_specific; 5597 struct packet_msg_prot *cq_pkt; 5598 struct packet_nop *cq_padding; 5599 u64 msi_addr; 5600 u32 tmp; 5601 5602 cq_padding = kernel_address + original_len; 5603 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5604 5605 while ((void *)cq_padding < (void *)cq_pkt) { 5606 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5607 cq_padding++; 5608 } 5609 5610 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5611 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5612 5613 if (eb) 5614 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5615 5616 cq_pkt->ctl = cpu_to_le32(tmp); 5617 cq_pkt->value = cpu_to_le32(cq_val); 5618 cq_pkt->addr = cpu_to_le64(cq_addr); 5619 5620 cq_pkt++; 5621 5622 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5623 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5624 cq_pkt->ctl = cpu_to_le32(tmp); 5625 cq_pkt->value = cpu_to_le32(1); 5626 5627 if (gaudi->multi_msi_mode) 5628 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; 5629 else 5630 msi_addr = mmPCIE_CORE_MSI_REQ; 5631 5632 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5633 } 5634 5635 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5636 { 5637 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5638 } 5639 5640 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5641 u32 size, u64 val) 5642 { 5643 struct packet_lin_dma *lin_dma_pkt; 5644 struct hl_cs_job *job; 5645 u32 cb_size, ctl, err_cause; 5646 struct hl_cb *cb; 5647 int rc; 5648 5649 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5650 if (!cb) 5651 return -EFAULT; 5652 5653 lin_dma_pkt = cb->kernel_address; 5654 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5655 cb_size = sizeof(*lin_dma_pkt); 5656 5657 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5658 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5659 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5660 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5661 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5662 5663 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5664 lin_dma_pkt->src_addr = cpu_to_le64(val); 5665 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5666 lin_dma_pkt->tsize = cpu_to_le32(size); 5667 5668 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5669 if (!job) { 5670 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5671 rc = -ENOMEM; 5672 goto release_cb; 5673 } 5674 5675 /* Verify DMA is OK */ 5676 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5677 if (err_cause && !hdev->init_done) { 5678 dev_dbg(hdev->dev, 5679 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5680 err_cause); 5681 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5682 } 5683 5684 job->id = 0; 5685 job->user_cb = cb; 5686 atomic_inc(&job->user_cb->cs_cnt); 5687 job->user_cb_size = cb_size; 5688 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5689 job->patched_cb = job->user_cb; 5690 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5691 5692 hl_debugfs_add_job(hdev, job); 5693 5694 rc = gaudi_send_job_on_qman0(hdev, job); 5695 hl_debugfs_remove_job(hdev, job); 5696 kfree(job); 5697 atomic_dec(&cb->cs_cnt); 5698 5699 /* Verify DMA is OK */ 5700 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5701 if (err_cause) { 5702 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5703 rc = -EIO; 5704 if (!hdev->init_done) { 5705 dev_dbg(hdev->dev, 5706 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5707 err_cause); 5708 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5709 } 5710 } 5711 5712 release_cb: 5713 hl_cb_put(cb); 5714 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5715 5716 return rc; 5717 } 5718 5719 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5720 u32 num_regs, u32 val) 5721 { 5722 struct packet_msg_long *pkt; 5723 struct hl_cs_job *job; 5724 u32 cb_size, ctl; 5725 struct hl_cb *cb; 5726 int i, rc; 5727 5728 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5729 5730 if (cb_size > SZ_2M) { 5731 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5732 return -ENOMEM; 5733 } 5734 5735 cb = hl_cb_kernel_create(hdev, cb_size, false); 5736 if (!cb) 5737 return -EFAULT; 5738 5739 pkt = cb->kernel_address; 5740 5741 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5742 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5743 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5744 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5745 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5746 5747 for (i = 0; i < num_regs ; i++, pkt++) { 5748 pkt->ctl = cpu_to_le32(ctl); 5749 pkt->value = cpu_to_le32(val); 5750 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5751 } 5752 5753 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5754 if (!job) { 5755 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5756 rc = -ENOMEM; 5757 goto release_cb; 5758 } 5759 5760 job->id = 0; 5761 job->user_cb = cb; 5762 atomic_inc(&job->user_cb->cs_cnt); 5763 job->user_cb_size = cb_size; 5764 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5765 job->patched_cb = job->user_cb; 5766 job->job_cb_size = cb_size; 5767 5768 hl_debugfs_add_job(hdev, job); 5769 5770 rc = gaudi_send_job_on_qman0(hdev, job); 5771 hl_debugfs_remove_job(hdev, job); 5772 kfree(job); 5773 atomic_dec(&cb->cs_cnt); 5774 5775 release_cb: 5776 hl_cb_put(cb); 5777 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5778 5779 return rc; 5780 } 5781 5782 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5783 { 5784 u64 base_addr; 5785 u32 num_regs; 5786 int rc; 5787 5788 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5789 num_regs = NUM_OF_SOB_IN_BLOCK; 5790 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5791 if (rc) { 5792 dev_err(hdev->dev, "failed resetting SM registers"); 5793 return -ENOMEM; 5794 } 5795 5796 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5797 num_regs = NUM_OF_SOB_IN_BLOCK; 5798 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5799 if (rc) { 5800 dev_err(hdev->dev, "failed resetting SM registers"); 5801 return -ENOMEM; 5802 } 5803 5804 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5805 num_regs = NUM_OF_SOB_IN_BLOCK; 5806 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5807 if (rc) { 5808 dev_err(hdev->dev, "failed resetting SM registers"); 5809 return -ENOMEM; 5810 } 5811 5812 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5813 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5814 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5815 if (rc) { 5816 dev_err(hdev->dev, "failed resetting SM registers"); 5817 return -ENOMEM; 5818 } 5819 5820 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5821 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5822 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5823 if (rc) { 5824 dev_err(hdev->dev, "failed resetting SM registers"); 5825 return -ENOMEM; 5826 } 5827 5828 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5829 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5830 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5831 if (rc) { 5832 dev_err(hdev->dev, "failed resetting SM registers"); 5833 return -ENOMEM; 5834 } 5835 5836 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5837 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5838 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5839 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5840 if (rc) { 5841 dev_err(hdev->dev, "failed resetting SM registers"); 5842 return -ENOMEM; 5843 } 5844 5845 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5846 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5847 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5848 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5849 if (rc) { 5850 dev_err(hdev->dev, "failed resetting SM registers"); 5851 return -ENOMEM; 5852 } 5853 5854 return 0; 5855 } 5856 5857 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5858 { 5859 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5860 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5861 int i; 5862 5863 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5864 u64 sob_addr = CFG_BASE + 5865 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5866 (i * sob_delta); 5867 u32 dma_offset = i * DMA_CORE_OFFSET; 5868 5869 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5870 lower_32_bits(sob_addr)); 5871 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5872 upper_32_bits(sob_addr)); 5873 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5874 5875 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5876 * modified by the user for SRAM reduction 5877 */ 5878 if (i > 1) 5879 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5880 0x00000001); 5881 } 5882 } 5883 5884 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5885 { 5886 u32 qman_offset; 5887 int i; 5888 5889 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5890 qman_offset = i * DMA_QMAN_OFFSET; 5891 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5892 } 5893 5894 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5895 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5896 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5897 } 5898 5899 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5900 qman_offset = i * TPC_QMAN_OFFSET; 5901 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5902 } 5903 5904 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5905 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5906 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5907 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5908 } 5909 } 5910 5911 static int gaudi_restore_user_registers(struct hl_device *hdev) 5912 { 5913 int rc; 5914 5915 rc = gaudi_restore_sm_registers(hdev); 5916 if (rc) 5917 return rc; 5918 5919 gaudi_restore_dma_registers(hdev); 5920 gaudi_restore_qm_registers(hdev); 5921 5922 return 0; 5923 } 5924 5925 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5926 { 5927 return 0; 5928 } 5929 5930 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5931 { 5932 u32 size = hdev->asic_prop.mmu_pgt_size + 5933 hdev->asic_prop.mmu_cache_mng_size; 5934 struct gaudi_device *gaudi = hdev->asic_specific; 5935 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5936 5937 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5938 return 0; 5939 5940 return gaudi_memset_device_memory(hdev, addr, size, 0); 5941 } 5942 5943 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5944 { 5945 5946 } 5947 5948 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5949 u32 size_to_dma, dma_addr_t dma_addr) 5950 { 5951 u32 err_cause, val; 5952 u64 dma_offset; 5953 int rc; 5954 5955 dma_offset = dma_id * DMA_CORE_OFFSET; 5956 5957 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5958 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5959 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5960 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5961 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5962 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5963 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5964 5965 rc = hl_poll_timeout( 5966 hdev, 5967 mmDMA0_CORE_STS0 + dma_offset, 5968 val, 5969 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5970 0, 5971 1000000); 5972 5973 if (rc) { 5974 dev_err(hdev->dev, 5975 "DMA %d timed-out during reading of 0x%llx\n", 5976 dma_id, addr); 5977 return -EIO; 5978 } 5979 5980 /* Verify DMA is OK */ 5981 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5982 if (err_cause) { 5983 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5984 dev_dbg(hdev->dev, 5985 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5986 err_cause); 5987 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5988 5989 return -EIO; 5990 } 5991 5992 return 0; 5993 } 5994 5995 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5996 void *blob_addr) 5997 { 5998 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5999 u32 qm_glbl_sts0, qm_cgm_sts; 6000 u64 dma_offset, qm_offset; 6001 dma_addr_t dma_addr; 6002 void *kernel_addr; 6003 bool is_eng_idle; 6004 int rc = 0, dma_id; 6005 6006 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 6007 6008 if (!kernel_addr) 6009 return -ENOMEM; 6010 6011 hdev->asic_funcs->hw_queues_lock(hdev); 6012 6013 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 6014 dma_offset = dma_id * DMA_CORE_OFFSET; 6015 qm_offset = dma_id * DMA_QMAN_OFFSET; 6016 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6017 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6018 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6019 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6020 IS_DMA_IDLE(dma_core_sts0); 6021 6022 if (!is_eng_idle) { 6023 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 6024 dma_offset = dma_id * DMA_CORE_OFFSET; 6025 qm_offset = dma_id * DMA_QMAN_OFFSET; 6026 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6027 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6028 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6029 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6030 IS_DMA_IDLE(dma_core_sts0); 6031 6032 if (!is_eng_idle) { 6033 dev_err_ratelimited(hdev->dev, 6034 "Can't read via DMA because it is BUSY\n"); 6035 rc = -EAGAIN; 6036 goto out; 6037 } 6038 } 6039 6040 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 6041 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 6042 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 6043 6044 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6045 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6046 * ASID 6047 */ 6048 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6049 6050 /* Verify DMA is OK */ 6051 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6052 if (err_cause) { 6053 dev_dbg(hdev->dev, 6054 "Clearing DMA0 engine from errors (cause 0x%x)\n", 6055 err_cause); 6056 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 6057 } 6058 6059 pos = 0; 6060 size_left = size; 6061 size_to_dma = SZ_2M; 6062 6063 while (size_left > 0) { 6064 6065 if (size_left < SZ_2M) 6066 size_to_dma = size_left; 6067 6068 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6069 dma_addr); 6070 if (rc) 6071 break; 6072 6073 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6074 6075 if (size_left <= SZ_2M) 6076 break; 6077 6078 pos += SZ_2M; 6079 addr += SZ_2M; 6080 size_left -= SZ_2M; 6081 } 6082 6083 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6084 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6085 * ASID 6086 */ 6087 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6088 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6089 6090 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6091 6092 out: 6093 hdev->asic_funcs->hw_queues_unlock(hdev); 6094 6095 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6096 6097 return rc; 6098 } 6099 6100 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6101 { 6102 struct gaudi_device *gaudi = hdev->asic_specific; 6103 6104 if (hdev->reset_info.hard_reset_pending) 6105 return U64_MAX; 6106 6107 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6108 (addr - gaudi->hbm_bar_cur_addr)); 6109 } 6110 6111 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6112 { 6113 struct gaudi_device *gaudi = hdev->asic_specific; 6114 6115 if (hdev->reset_info.hard_reset_pending) 6116 return; 6117 6118 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6119 (addr - gaudi->hbm_bar_cur_addr)); 6120 } 6121 6122 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6123 { 6124 /* mask to zero the MMBP and ASID bits */ 6125 WREG32_AND(reg, ~0x7FF); 6126 WREG32_OR(reg, asid); 6127 } 6128 6129 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6130 { 6131 struct gaudi_device *gaudi = hdev->asic_specific; 6132 6133 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6134 return; 6135 6136 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6137 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6138 return; 6139 } 6140 6141 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6142 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6143 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6144 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6145 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6146 6147 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6149 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6152 6153 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6155 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6158 6159 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6160 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6161 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6164 6165 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6167 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6168 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6170 6171 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6173 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6176 6177 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6179 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6182 6183 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6184 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6185 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6188 6189 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6191 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6192 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6197 6198 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6205 6206 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6207 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6213 6214 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6215 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6216 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6217 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6218 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6219 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6220 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6221 6222 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6223 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6224 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6225 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6226 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6227 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6228 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6229 6230 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6231 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6232 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6233 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6234 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6235 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6236 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6237 6238 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6239 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6240 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6241 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6242 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6243 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6244 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6245 6246 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6247 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6248 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6249 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6250 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6251 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6252 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6253 6254 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6255 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6256 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6257 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6258 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6259 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6260 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6261 6262 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6263 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6264 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6265 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6266 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6267 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6268 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6269 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6270 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6271 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6272 6273 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6274 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6275 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6276 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6277 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6278 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6279 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6280 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6281 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6282 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6283 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6284 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6285 6286 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6287 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6288 asid); 6289 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6290 asid); 6291 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6292 asid); 6293 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6294 asid); 6295 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6296 asid); 6297 } 6298 6299 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6300 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6301 asid); 6302 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6303 asid); 6304 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6305 asid); 6306 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6307 asid); 6308 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6309 asid); 6310 } 6311 6312 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6313 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6314 asid); 6315 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6316 asid); 6317 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6318 asid); 6319 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6320 asid); 6321 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6322 asid); 6323 } 6324 6325 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6326 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6327 asid); 6328 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6329 asid); 6330 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6331 asid); 6332 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6333 asid); 6334 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6335 asid); 6336 } 6337 6338 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6339 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6340 asid); 6341 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6342 asid); 6343 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6344 asid); 6345 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6346 asid); 6347 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6348 asid); 6349 } 6350 6351 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6352 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6353 asid); 6354 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6355 asid); 6356 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6357 asid); 6358 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6359 asid); 6360 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6361 asid); 6362 } 6363 6364 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6365 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6366 asid); 6367 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6368 asid); 6369 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6370 asid); 6371 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6372 asid); 6373 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6374 asid); 6375 } 6376 6377 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6378 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6379 asid); 6380 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6381 asid); 6382 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6383 asid); 6384 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6385 asid); 6386 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6387 asid); 6388 } 6389 6390 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6391 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6392 asid); 6393 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6394 asid); 6395 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6396 asid); 6397 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6398 asid); 6399 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6400 asid); 6401 } 6402 6403 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6404 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6405 asid); 6406 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6407 asid); 6408 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6409 asid); 6410 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6411 asid); 6412 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6413 asid); 6414 } 6415 6416 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6417 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6418 } 6419 6420 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6421 struct hl_cs_job *job) 6422 { 6423 struct packet_msg_prot *fence_pkt; 6424 u32 *fence_ptr; 6425 dma_addr_t fence_dma_addr; 6426 struct hl_cb *cb; 6427 u32 tmp, timeout, dma_offset; 6428 int rc; 6429 6430 if (hdev->pldm) 6431 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6432 else 6433 timeout = HL_DEVICE_TIMEOUT_USEC; 6434 6435 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 6436 dev_err_ratelimited(hdev->dev, 6437 "Can't send driver job on QMAN0 because the device is not idle\n"); 6438 return -EBUSY; 6439 } 6440 6441 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6442 if (!fence_ptr) { 6443 dev_err(hdev->dev, 6444 "Failed to allocate fence memory for QMAN0\n"); 6445 return -ENOMEM; 6446 } 6447 6448 cb = job->patched_cb; 6449 6450 fence_pkt = cb->kernel_address + 6451 job->job_cb_size - sizeof(struct packet_msg_prot); 6452 6453 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6454 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6455 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6456 6457 fence_pkt->ctl = cpu_to_le32(tmp); 6458 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6459 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6460 6461 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6462 6463 WREG32(mmDMA0_CORE_PROT + dma_offset, 6464 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6465 6466 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6467 job->job_cb_size, cb->bus_address); 6468 if (rc) { 6469 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6470 goto free_fence_ptr; 6471 } 6472 6473 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6474 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6475 timeout, true); 6476 6477 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6478 6479 if (rc == -ETIMEDOUT) { 6480 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6481 goto free_fence_ptr; 6482 } 6483 6484 free_fence_ptr: 6485 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6486 6487 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6488 return rc; 6489 } 6490 6491 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6492 { 6493 if (event_type >= GAUDI_EVENT_SIZE) 6494 goto event_not_supported; 6495 6496 if (!gaudi_irq_map_table[event_type].valid) 6497 goto event_not_supported; 6498 6499 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6500 6501 return; 6502 6503 event_not_supported: 6504 snprintf(desc, size, "N/A"); 6505 } 6506 6507 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6508 bool is_write, u16 *engine_id_1, 6509 u16 *engine_id_2) 6510 { 6511 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6512 6513 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6514 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6515 6516 switch (x_y) { 6517 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6518 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6519 dma_id[0] = 0; 6520 dma_id[1] = 2; 6521 break; 6522 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6523 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6524 dma_id[0] = 1; 6525 dma_id[1] = 3; 6526 break; 6527 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6528 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6529 dma_id[0] = 4; 6530 dma_id[1] = 6; 6531 break; 6532 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6533 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6534 dma_id[0] = 5; 6535 dma_id[1] = 7; 6536 break; 6537 default: 6538 goto unknown_initiator; 6539 } 6540 6541 for (i = 0 ; i < 2 ; i++) { 6542 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6543 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6544 } 6545 6546 switch (x_y) { 6547 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6548 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6549 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6550 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6551 return "DMA0"; 6552 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6553 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6554 return "DMA2"; 6555 } else { 6556 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6557 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6558 return "DMA0 or DMA2"; 6559 } 6560 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6561 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6562 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6563 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6564 return "DMA1"; 6565 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6566 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6567 return "DMA3"; 6568 } else { 6569 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6570 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6571 return "DMA1 or DMA3"; 6572 } 6573 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6574 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6575 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6576 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6577 return "DMA4"; 6578 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6579 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6580 return "DMA6"; 6581 } else { 6582 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6583 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6584 return "DMA4 or DMA6"; 6585 } 6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6587 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6588 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6589 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6590 return "DMA5"; 6591 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6592 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6593 return "DMA7"; 6594 } else { 6595 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6596 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6597 return "DMA5 or DMA7"; 6598 } 6599 } 6600 6601 unknown_initiator: 6602 return "unknown initiator"; 6603 } 6604 6605 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6606 u16 *engine_id_1, u16 *engine_id_2) 6607 { 6608 u32 val, x_y, axi_id; 6609 6610 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6611 RREG32(mmMMU_UP_RAZWI_READ_ID); 6612 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6613 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6614 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6615 RAZWI_INITIATOR_AXI_ID_SHIFT); 6616 6617 switch (x_y) { 6618 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6619 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6620 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6621 return "TPC0"; 6622 } 6623 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6624 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6625 return "NIC0"; 6626 } 6627 break; 6628 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6629 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6630 return "TPC1"; 6631 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6632 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6633 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6634 return "MME0"; 6635 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6636 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6637 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6638 return "MME1"; 6639 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6640 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6641 return "TPC2"; 6642 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6643 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6644 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6645 return "TPC3"; 6646 } 6647 /* PCI, CPU or PSOC does not have engine id*/ 6648 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6649 return "PCI"; 6650 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6651 return "CPU"; 6652 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6653 return "PSOC"; 6654 break; 6655 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6656 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6657 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6658 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6659 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6660 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6661 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6662 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6663 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6664 engine_id_1, engine_id_2); 6665 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6666 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6667 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6668 return "TPC4"; 6669 } 6670 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6671 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6672 return "NIC1"; 6673 } 6674 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6675 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6676 return "NIC2"; 6677 } 6678 break; 6679 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6680 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6681 return "TPC5"; 6682 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6683 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6684 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6685 return "MME2"; 6686 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6687 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6688 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6689 return "MME3"; 6690 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6691 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6692 return "TPC6"; 6693 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6694 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6695 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6696 return "TPC7"; 6697 } 6698 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6699 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6700 return "NIC4"; 6701 } 6702 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6703 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6704 return "NIC5"; 6705 } 6706 break; 6707 default: 6708 break; 6709 } 6710 6711 dev_err(hdev->dev, 6712 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6713 val, 6714 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6715 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6716 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6717 RAZWI_INITIATOR_AXI_ID_MASK); 6718 6719 return "unknown initiator"; 6720 } 6721 6722 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6723 u16 *engine_id_2, bool *is_read, bool *is_write) 6724 { 6725 6726 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6727 dev_err_ratelimited(hdev->dev, 6728 "RAZWI event caused by illegal write of %s\n", 6729 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6730 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6731 *is_write = true; 6732 } 6733 6734 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6735 dev_err_ratelimited(hdev->dev, 6736 "RAZWI event caused by illegal read of %s\n", 6737 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6738 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6739 *is_read = true; 6740 } 6741 } 6742 6743 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6744 { 6745 struct gaudi_device *gaudi = hdev->asic_specific; 6746 u32 val; 6747 6748 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6749 return; 6750 6751 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6752 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6753 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6754 *addr <<= 32; 6755 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6756 6757 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6758 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6759 6760 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6761 } 6762 6763 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6764 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6765 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6766 *addr <<= 32; 6767 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6768 6769 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6770 6771 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6772 } 6773 } 6774 6775 /* 6776 * +-------------------+------------------------------------------------------+ 6777 * | Configuration Reg | Description | 6778 * | Address | | 6779 * +-------------------+------------------------------------------------------+ 6780 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6781 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6782 * | |0xF34 memory wrappers 63:32 | 6783 * | |0xF38 memory wrappers 95:64 | 6784 * | |0xF3C memory wrappers 127:96 | 6785 * +-------------------+------------------------------------------------------+ 6786 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6787 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6788 * | |0xF44 memory wrappers 63:32 | 6789 * | |0xF48 memory wrappers 95:64 | 6790 * | |0xF4C memory wrappers 127:96 | 6791 * +-------------------+------------------------------------------------------+ 6792 */ 6793 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6794 struct ecc_info_extract_params *params, u64 *ecc_address, 6795 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6796 { 6797 u32 i, num_mem_regs, reg, err_bit; 6798 u64 err_addr, err_word = 0; 6799 6800 num_mem_regs = params->num_memories / 32 + 6801 ((params->num_memories % 32) ? 1 : 0); 6802 6803 if (params->block_address >= CFG_BASE) 6804 params->block_address -= CFG_BASE; 6805 6806 if (params->derr) 6807 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6808 else 6809 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6810 6811 /* Set invalid wrapper index */ 6812 *memory_wrapper_idx = 0xFF; 6813 6814 /* Iterate through memory wrappers, a single bit must be set */ 6815 for (i = 0 ; i < num_mem_regs ; i++) { 6816 err_addr += i * 4; 6817 err_word = RREG32(err_addr); 6818 if (err_word) { 6819 err_bit = __ffs(err_word); 6820 *memory_wrapper_idx = err_bit + (32 * i); 6821 break; 6822 } 6823 } 6824 6825 if (*memory_wrapper_idx == 0xFF) { 6826 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6827 return -EINVAL; 6828 } 6829 6830 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6831 *memory_wrapper_idx); 6832 6833 *ecc_address = 6834 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6835 *ecc_syndrom = 6836 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6837 6838 /* Clear error indication */ 6839 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6840 if (params->derr) 6841 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6842 else 6843 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6844 6845 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6846 6847 return 0; 6848 } 6849 6850 /* 6851 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6852 * 6853 * @idx: the current pi/ci value 6854 * @q_len: the queue length (power of 2) 6855 * 6856 * @return the cyclically decremented index 6857 */ 6858 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6859 { 6860 u32 mask = q_len - 1; 6861 6862 /* 6863 * modular decrement is equivalent to adding (queue_size -1) 6864 * later we take LSBs to make sure the value is in the 6865 * range [0, queue_len - 1] 6866 */ 6867 return (idx + q_len - 1) & mask; 6868 } 6869 6870 /** 6871 * gaudi_handle_sw_config_stream_data - print SW config stream data 6872 * 6873 * @hdev: pointer to the habanalabs device structure 6874 * @stream: the QMAN's stream 6875 * @qman_base: base address of QMAN registers block 6876 * @event_mask: mask of the last events occurred 6877 */ 6878 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6879 u64 qman_base, u64 event_mask) 6880 { 6881 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6882 u32 cq_ptr_lo_off, size; 6883 6884 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6885 6886 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6887 stream * cq_ptr_lo_off; 6888 cq_ptr_hi = cq_ptr_lo + 6889 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6890 cq_tsize = cq_ptr_lo + 6891 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6892 6893 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6894 size = RREG32(cq_tsize); 6895 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6896 stream, cq_ptr, size); 6897 6898 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6899 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6900 hdev->captured_err_info.undef_opcode.cq_size = size; 6901 hdev->captured_err_info.undef_opcode.stream_id = stream; 6902 } 6903 } 6904 6905 /** 6906 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6907 * 6908 * @hdev: pointer to the habanalabs device structure 6909 * @qid_base: first QID of the QMAN (out of 4 streams) 6910 * @stream: the QMAN's stream 6911 * @qman_base: base address of QMAN registers block 6912 * @event_mask: mask of the last events occurred 6913 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6914 */ 6915 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6916 u32 stream, u64 qman_base, 6917 u64 event_mask, 6918 bool pr_sw_conf) 6919 { 6920 u32 ci, qm_ci_stream_off, queue_len; 6921 struct hl_hw_queue *q; 6922 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6923 int i; 6924 6925 q = &hdev->kernel_queues[qid_base + stream]; 6926 6927 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6928 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6929 stream * qm_ci_stream_off; 6930 6931 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6932 q->int_queue_len : HL_QUEUE_LENGTH; 6933 6934 hdev->asic_funcs->hw_queues_lock(hdev); 6935 6936 if (pr_sw_conf) 6937 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6938 6939 ci = RREG32(pq_ci); 6940 6941 /* we should start printing form ci -1 */ 6942 ci = gaudi_queue_idx_dec(ci, queue_len); 6943 memset(addr, 0, sizeof(addr)); 6944 6945 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6946 struct hl_bd *bd; 6947 u32 len; 6948 6949 bd = q->kernel_address; 6950 bd += ci; 6951 6952 len = le32_to_cpu(bd->len); 6953 /* len 0 means uninitialized entry- break */ 6954 if (!len) 6955 break; 6956 6957 addr[i] = le64_to_cpu(bd->ptr); 6958 6959 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6960 stream, ci, addr[i], len); 6961 6962 /* get previous ci, wrap if needed */ 6963 ci = gaudi_queue_idx_dec(ci, queue_len); 6964 } 6965 6966 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6967 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6968 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6969 6970 if (arr_idx == 0) { 6971 undef_opcode->timestamp = ktime_get(); 6972 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6973 } 6974 6975 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6976 undef_opcode->cb_addr_streams_len++; 6977 } 6978 6979 hdev->asic_funcs->hw_queues_unlock(hdev); 6980 } 6981 6982 /** 6983 * handle_qman_data_on_err - extract QMAN data on error 6984 * 6985 * @hdev: pointer to the habanalabs device structure 6986 * @qid_base: first QID of the QMAN (out of 4 streams) 6987 * @stream: the QMAN's stream 6988 * @qman_base: base address of QMAN registers block 6989 * @event_mask: mask of the last events occurred 6990 * 6991 * This function attempt to exatract as much data as possible on QMAN error. 6992 * On upper CP print the SW config stream data and last 8 PQEs. 6993 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6994 */ 6995 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6996 u32 stream, u64 qman_base, u64 event_mask) 6997 { 6998 u32 i; 6999 7000 if (stream != QMAN_STREAMS) { 7001 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 7002 qman_base, event_mask, true); 7003 return; 7004 } 7005 7006 /* handle Lower-CP */ 7007 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 7008 7009 for (i = 0; i < QMAN_STREAMS; i++) 7010 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 7011 qman_base, event_mask, false); 7012 } 7013 7014 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 7015 const char *qm_name, 7016 u64 qman_base, 7017 u32 qid_base, 7018 u64 *event_mask) 7019 { 7020 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 7021 u64 glbl_sts_addr, arb_err_addr; 7022 char reg_desc[32]; 7023 7024 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 7025 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 7026 7027 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 7028 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7029 glbl_sts_clr_val = 0; 7030 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7031 7032 if (!glbl_sts_val) 7033 continue; 7034 7035 if (i == QMAN_STREAMS) 7036 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7037 else 7038 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7039 7040 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 7041 if (glbl_sts_val & BIT(j)) { 7042 dev_err_ratelimited(hdev->dev, 7043 "%s %s. err cause: %s\n", 7044 qm_name, reg_desc, 7045 gaudi_qman_error_cause[j]); 7046 glbl_sts_clr_val |= BIT(j); 7047 } 7048 } 7049 /* check for undefined opcode */ 7050 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 7051 hdev->captured_err_info.undef_opcode.write_enable) { 7052 memset(&hdev->captured_err_info.undef_opcode, 0, 7053 sizeof(hdev->captured_err_info.undef_opcode)); 7054 7055 hdev->captured_err_info.undef_opcode.write_enable = false; 7056 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 7057 } 7058 7059 /* Write 1 clear errors */ 7060 if (!hdev->stop_on_err) 7061 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 7062 else 7063 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 7064 } 7065 7066 arb_err_val = RREG32(arb_err_addr); 7067 7068 if (!arb_err_val) 7069 return; 7070 7071 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7072 if (arb_err_val & BIT(j)) { 7073 dev_err_ratelimited(hdev->dev, 7074 "%s ARB_ERR. err cause: %s\n", 7075 qm_name, 7076 gaudi_qman_arb_error_cause[j]); 7077 } 7078 } 7079 } 7080 7081 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7082 struct hl_eq_sm_sei_data *sei_data) 7083 { 7084 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7085 7086 /* Flip the bits as the enum is ordered in the opposite way */ 7087 index = (index ^ 0x3) & 0x3; 7088 7089 switch (sei_data->sei_cause) { 7090 case SM_SEI_SO_OVERFLOW: 7091 dev_err_ratelimited(hdev->dev, 7092 "%s SEI Error: SOB Group %u overflow/underflow", 7093 gaudi_sync_manager_names[index], 7094 le32_to_cpu(sei_data->sei_log)); 7095 break; 7096 case SM_SEI_LBW_4B_UNALIGNED: 7097 dev_err_ratelimited(hdev->dev, 7098 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7099 gaudi_sync_manager_names[index], 7100 le32_to_cpu(sei_data->sei_log)); 7101 break; 7102 case SM_SEI_AXI_RESPONSE_ERR: 7103 dev_err_ratelimited(hdev->dev, 7104 "%s SEI Error: AXI ID %u response error", 7105 gaudi_sync_manager_names[index], 7106 le32_to_cpu(sei_data->sei_log)); 7107 break; 7108 default: 7109 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7110 le32_to_cpu(sei_data->sei_log)); 7111 break; 7112 } 7113 } 7114 7115 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7116 struct hl_eq_ecc_data *ecc_data) 7117 { 7118 struct ecc_info_extract_params params; 7119 u64 ecc_address = 0, ecc_syndrom = 0; 7120 u8 index, memory_wrapper_idx = 0; 7121 bool extract_info_from_fw; 7122 int rc; 7123 7124 if (hdev->asic_prop.fw_security_enabled) { 7125 extract_info_from_fw = true; 7126 goto extract_ecc_info; 7127 } 7128 7129 switch (event_type) { 7130 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7131 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7132 extract_info_from_fw = true; 7133 break; 7134 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7135 index = event_type - GAUDI_EVENT_TPC0_SERR; 7136 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7137 params.num_memories = 90; 7138 params.derr = false; 7139 extract_info_from_fw = false; 7140 break; 7141 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7142 index = event_type - GAUDI_EVENT_TPC0_DERR; 7143 params.block_address = 7144 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7145 params.num_memories = 90; 7146 params.derr = true; 7147 extract_info_from_fw = false; 7148 break; 7149 case GAUDI_EVENT_MME0_ACC_SERR: 7150 case GAUDI_EVENT_MME1_ACC_SERR: 7151 case GAUDI_EVENT_MME2_ACC_SERR: 7152 case GAUDI_EVENT_MME3_ACC_SERR: 7153 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7154 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7155 params.num_memories = 128; 7156 params.derr = false; 7157 extract_info_from_fw = false; 7158 break; 7159 case GAUDI_EVENT_MME0_ACC_DERR: 7160 case GAUDI_EVENT_MME1_ACC_DERR: 7161 case GAUDI_EVENT_MME2_ACC_DERR: 7162 case GAUDI_EVENT_MME3_ACC_DERR: 7163 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7164 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7165 params.num_memories = 128; 7166 params.derr = true; 7167 extract_info_from_fw = false; 7168 break; 7169 case GAUDI_EVENT_MME0_SBAB_SERR: 7170 case GAUDI_EVENT_MME1_SBAB_SERR: 7171 case GAUDI_EVENT_MME2_SBAB_SERR: 7172 case GAUDI_EVENT_MME3_SBAB_SERR: 7173 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7174 params.block_address = 7175 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7176 params.num_memories = 33; 7177 params.derr = false; 7178 extract_info_from_fw = false; 7179 break; 7180 case GAUDI_EVENT_MME0_SBAB_DERR: 7181 case GAUDI_EVENT_MME1_SBAB_DERR: 7182 case GAUDI_EVENT_MME2_SBAB_DERR: 7183 case GAUDI_EVENT_MME3_SBAB_DERR: 7184 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7185 params.block_address = 7186 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7187 params.num_memories = 33; 7188 params.derr = true; 7189 extract_info_from_fw = false; 7190 break; 7191 default: 7192 return; 7193 } 7194 7195 extract_ecc_info: 7196 if (extract_info_from_fw) { 7197 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7198 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7199 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7200 } else { 7201 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7202 &ecc_syndrom, &memory_wrapper_idx); 7203 if (rc) 7204 return; 7205 } 7206 7207 dev_err(hdev->dev, 7208 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7209 ecc_address, ecc_syndrom, memory_wrapper_idx); 7210 } 7211 7212 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7213 { 7214 u64 qman_base; 7215 char desc[32]; 7216 u32 qid_base; 7217 u8 index; 7218 7219 switch (event_type) { 7220 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7221 index = event_type - GAUDI_EVENT_TPC0_QM; 7222 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7223 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7224 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7225 break; 7226 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7227 if (event_type == GAUDI_EVENT_MME0_QM) { 7228 index = 0; 7229 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7230 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7231 index = 2; 7232 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7233 } 7234 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7235 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7236 break; 7237 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7238 index = event_type - GAUDI_EVENT_DMA0_QM; 7239 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7240 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7241 if (index > 1) 7242 qid_base++; 7243 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7244 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7245 break; 7246 case GAUDI_EVENT_NIC0_QM0: 7247 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7248 qman_base = mmNIC0_QM0_BASE; 7249 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7250 break; 7251 case GAUDI_EVENT_NIC0_QM1: 7252 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7253 qman_base = mmNIC0_QM1_BASE; 7254 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7255 break; 7256 case GAUDI_EVENT_NIC1_QM0: 7257 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7258 qman_base = mmNIC1_QM0_BASE; 7259 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7260 break; 7261 case GAUDI_EVENT_NIC1_QM1: 7262 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7263 qman_base = mmNIC1_QM1_BASE; 7264 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7265 break; 7266 case GAUDI_EVENT_NIC2_QM0: 7267 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7268 qman_base = mmNIC2_QM0_BASE; 7269 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7270 break; 7271 case GAUDI_EVENT_NIC2_QM1: 7272 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7273 qman_base = mmNIC2_QM1_BASE; 7274 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7275 break; 7276 case GAUDI_EVENT_NIC3_QM0: 7277 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7278 qman_base = mmNIC3_QM0_BASE; 7279 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7280 break; 7281 case GAUDI_EVENT_NIC3_QM1: 7282 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7283 qman_base = mmNIC3_QM1_BASE; 7284 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7285 break; 7286 case GAUDI_EVENT_NIC4_QM0: 7287 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7288 qman_base = mmNIC4_QM0_BASE; 7289 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7290 break; 7291 case GAUDI_EVENT_NIC4_QM1: 7292 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7293 qman_base = mmNIC4_QM1_BASE; 7294 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7295 break; 7296 default: 7297 return; 7298 } 7299 7300 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7301 } 7302 7303 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7304 bool razwi, u64 *event_mask) 7305 { 7306 bool is_read = false, is_write = false; 7307 u16 engine_id[2], num_of_razwi_eng = 0; 7308 char desc[64] = ""; 7309 u64 razwi_addr = 0; 7310 u8 razwi_flags = 0; 7311 7312 /* 7313 * Init engine id by default as not valid and only if razwi initiated from engine with 7314 * engine id it will get valid value. 7315 */ 7316 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7317 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7318 7319 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7320 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7321 event_type, desc); 7322 7323 if (razwi) { 7324 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7325 &is_write); 7326 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7327 7328 if (is_read) 7329 razwi_flags |= HL_RAZWI_READ; 7330 if (is_write) 7331 razwi_flags |= HL_RAZWI_WRITE; 7332 7333 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7334 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7335 num_of_razwi_eng = 2; 7336 else 7337 num_of_razwi_eng = 1; 7338 } 7339 7340 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags, 7341 event_mask); 7342 } 7343 } 7344 7345 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7346 struct cpucp_pkt_sync_err *sync_err) 7347 { 7348 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7349 7350 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7351 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7352 } 7353 7354 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7355 struct hl_eq_fw_alive *fw_alive) 7356 { 7357 dev_err(hdev->dev, 7358 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7359 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7360 le32_to_cpu(fw_alive->process_id), 7361 le32_to_cpu(fw_alive->thread_id), 7362 le64_to_cpu(fw_alive->uptime_seconds)); 7363 } 7364 7365 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7366 void *data) 7367 { 7368 char desc[64] = "", *type; 7369 struct eq_nic_sei_event *eq_nic_sei = data; 7370 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7371 7372 switch (eq_nic_sei->axi_error_cause) { 7373 case RXB: 7374 type = "RXB"; 7375 break; 7376 case RXE: 7377 type = "RXE"; 7378 break; 7379 case TXS: 7380 type = "TXS"; 7381 break; 7382 case TXE: 7383 type = "TXE"; 7384 break; 7385 case QPC_RESP: 7386 type = "QPC_RESP"; 7387 break; 7388 case NON_AXI_ERR: 7389 type = "NON_AXI_ERR"; 7390 break; 7391 case TMR: 7392 type = "TMR"; 7393 break; 7394 default: 7395 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7396 eq_nic_sei->axi_error_cause); 7397 type = "N/A"; 7398 break; 7399 } 7400 7401 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7402 eq_nic_sei->id); 7403 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7404 event_type, desc); 7405 } 7406 7407 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7408 { 7409 /* GAUDI doesn't support any reset except hard-reset */ 7410 return -EPERM; 7411 } 7412 7413 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7414 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7415 { 7416 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7417 int rc = 0; 7418 7419 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7420 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7421 if (!hbm_ecc_data) { 7422 dev_err(hdev->dev, "No FW ECC data"); 7423 return 0; 7424 } 7425 7426 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7427 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7428 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7429 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7430 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7431 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7432 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7433 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7434 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7435 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7436 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7437 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7438 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7439 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7440 7441 dev_err(hdev->dev, 7442 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7443 device, ch, wr_par, rd_par, ca_par, serr, derr); 7444 dev_err(hdev->dev, 7445 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7446 device, ch, hbm_ecc_data->first_addr, type, 7447 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7448 hbm_ecc_data->dec_cnt); 7449 return 0; 7450 } 7451 7452 if (hdev->asic_prop.fw_security_enabled) { 7453 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7454 return 0; 7455 } 7456 7457 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7458 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7459 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7460 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7461 if (val) { 7462 rc = -EIO; 7463 dev_err(hdev->dev, 7464 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7465 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7466 (val >> 2) & 0x1, (val >> 3) & 0x1, 7467 (val >> 4) & 0x1); 7468 7469 val2 = RREG32(base + ch * 0x1000 + 0x060); 7470 dev_err(hdev->dev, 7471 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7472 device, ch * 2, 7473 RREG32(base + ch * 0x1000 + 0x064), 7474 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7475 (val2 & 0xFF0000) >> 16, 7476 (val2 & 0xFF000000) >> 24); 7477 } 7478 7479 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7480 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7481 if (val) { 7482 rc = -EIO; 7483 dev_err(hdev->dev, 7484 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7485 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7486 (val >> 2) & 0x1, (val >> 3) & 0x1, 7487 (val >> 4) & 0x1); 7488 7489 val2 = RREG32(base + ch * 0x1000 + 0x070); 7490 dev_err(hdev->dev, 7491 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7492 device, ch * 2 + 1, 7493 RREG32(base + ch * 0x1000 + 0x074), 7494 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7495 (val2 & 0xFF0000) >> 16, 7496 (val2 & 0xFF000000) >> 24); 7497 } 7498 7499 /* Clear interrupts */ 7500 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7501 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7502 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7503 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7504 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7505 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7506 } 7507 7508 val = RREG32(base + 0x8F30); 7509 val2 = RREG32(base + 0x8F34); 7510 if (val | val2) { 7511 rc = -EIO; 7512 dev_err(hdev->dev, 7513 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7514 device, val, val2); 7515 } 7516 val = RREG32(base + 0x8F40); 7517 val2 = RREG32(base + 0x8F44); 7518 if (val | val2) { 7519 rc = -EIO; 7520 dev_err(hdev->dev, 7521 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7522 device, val, val2); 7523 } 7524 7525 return rc; 7526 } 7527 7528 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7529 { 7530 switch (hbm_event_type) { 7531 case GAUDI_EVENT_HBM0_SPI_0: 7532 case GAUDI_EVENT_HBM0_SPI_1: 7533 return 0; 7534 case GAUDI_EVENT_HBM1_SPI_0: 7535 case GAUDI_EVENT_HBM1_SPI_1: 7536 return 1; 7537 case GAUDI_EVENT_HBM2_SPI_0: 7538 case GAUDI_EVENT_HBM2_SPI_1: 7539 return 2; 7540 case GAUDI_EVENT_HBM3_SPI_0: 7541 case GAUDI_EVENT_HBM3_SPI_1: 7542 return 3; 7543 default: 7544 break; 7545 } 7546 7547 /* Should never happen */ 7548 return 0; 7549 } 7550 7551 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7552 char *interrupt_name) 7553 { 7554 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7555 bool soft_reset_required = false; 7556 7557 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7558 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7559 7560 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7561 if (tpc_interrupts_cause & BIT(i)) { 7562 dev_err_ratelimited(hdev->dev, 7563 "TPC%d_%s interrupt cause: %s\n", 7564 tpc_id, interrupt_name, 7565 gaudi_tpc_interrupts_cause[i]); 7566 /* If this is QM error, we need to soft-reset */ 7567 if (i == 15) 7568 soft_reset_required = true; 7569 } 7570 7571 /* Clear interrupts */ 7572 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7573 7574 return soft_reset_required; 7575 } 7576 7577 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7578 { 7579 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7580 } 7581 7582 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7583 { 7584 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7585 } 7586 7587 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7588 { 7589 ktime_t zero_time = ktime_set(0, 0); 7590 7591 mutex_lock(&hdev->clk_throttling.lock); 7592 7593 switch (event_type) { 7594 case GAUDI_EVENT_FIX_POWER_ENV_S: 7595 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7596 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7597 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7598 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7599 dev_info_ratelimited(hdev->dev, 7600 "Clock throttling due to power consumption\n"); 7601 break; 7602 7603 case GAUDI_EVENT_FIX_POWER_ENV_E: 7604 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7605 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7606 dev_info_ratelimited(hdev->dev, 7607 "Power envelop is safe, back to optimal clock\n"); 7608 break; 7609 7610 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7611 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7612 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7613 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7614 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7615 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7616 dev_info_ratelimited(hdev->dev, 7617 "Clock throttling due to overheating\n"); 7618 break; 7619 7620 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7621 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7622 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7623 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7624 dev_info_ratelimited(hdev->dev, 7625 "Thermal envelop is safe, back to optimal clock\n"); 7626 break; 7627 7628 default: 7629 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7630 event_type); 7631 break; 7632 } 7633 7634 mutex_unlock(&hdev->clk_throttling.lock); 7635 } 7636 7637 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7638 { 7639 struct gaudi_device *gaudi = hdev->asic_specific; 7640 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7641 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7642 u32 fw_fatal_err_flag = 0, flags = 0; 7643 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7644 >> EQ_CTL_EVENT_TYPE_SHIFT); 7645 bool reset_required, reset_direct = false; 7646 u8 cause; 7647 int rc; 7648 7649 if (event_type >= GAUDI_EVENT_SIZE) { 7650 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7651 event_type, GAUDI_EVENT_SIZE - 1); 7652 return; 7653 } 7654 7655 gaudi->events_stat[event_type]++; 7656 gaudi->events_stat_aggregate[event_type]++; 7657 7658 switch (event_type) { 7659 case GAUDI_EVENT_PCIE_CORE_DERR: 7660 case GAUDI_EVENT_PCIE_IF_DERR: 7661 case GAUDI_EVENT_PCIE_PHY_DERR: 7662 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7663 case GAUDI_EVENT_MME0_ACC_DERR: 7664 case GAUDI_EVENT_MME0_SBAB_DERR: 7665 case GAUDI_EVENT_MME1_ACC_DERR: 7666 case GAUDI_EVENT_MME1_SBAB_DERR: 7667 case GAUDI_EVENT_MME2_ACC_DERR: 7668 case GAUDI_EVENT_MME2_SBAB_DERR: 7669 case GAUDI_EVENT_MME3_ACC_DERR: 7670 case GAUDI_EVENT_MME3_SBAB_DERR: 7671 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7672 fallthrough; 7673 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7674 case GAUDI_EVENT_PSOC_MEM_DERR: 7675 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7676 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7677 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7678 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7679 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7680 case GAUDI_EVENT_MMU_DERR: 7681 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7682 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7683 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7684 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7685 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7686 goto reset_device; 7687 7688 case GAUDI_EVENT_GIC500: 7689 case GAUDI_EVENT_AXI_ECC: 7690 case GAUDI_EVENT_L2_RAM_ECC: 7691 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7692 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7693 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7694 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7695 goto reset_device; 7696 7697 case GAUDI_EVENT_HBM0_SPI_0: 7698 case GAUDI_EVENT_HBM1_SPI_0: 7699 case GAUDI_EVENT_HBM2_SPI_0: 7700 case GAUDI_EVENT_HBM3_SPI_0: 7701 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7702 gaudi_hbm_read_interrupts(hdev, 7703 gaudi_hbm_event_to_dev(event_type), 7704 &eq_entry->hbm_ecc_data); 7705 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7706 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7707 goto reset_device; 7708 7709 case GAUDI_EVENT_HBM0_SPI_1: 7710 case GAUDI_EVENT_HBM1_SPI_1: 7711 case GAUDI_EVENT_HBM2_SPI_1: 7712 case GAUDI_EVENT_HBM3_SPI_1: 7713 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7714 gaudi_hbm_read_interrupts(hdev, 7715 gaudi_hbm_event_to_dev(event_type), 7716 &eq_entry->hbm_ecc_data); 7717 hl_fw_unmask_irq(hdev, event_type); 7718 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7719 break; 7720 7721 case GAUDI_EVENT_TPC0_DEC: 7722 case GAUDI_EVENT_TPC1_DEC: 7723 case GAUDI_EVENT_TPC2_DEC: 7724 case GAUDI_EVENT_TPC3_DEC: 7725 case GAUDI_EVENT_TPC4_DEC: 7726 case GAUDI_EVENT_TPC5_DEC: 7727 case GAUDI_EVENT_TPC6_DEC: 7728 case GAUDI_EVENT_TPC7_DEC: 7729 /* In TPC DEC event, notify on TPC assertion. While there isn't 7730 * a specific event for assertion yet, the FW generates TPC DEC event. 7731 * The SW upper layer will inspect an internal mapped area to indicate 7732 * if the event is a TPC Assertion or a "real" TPC DEC. 7733 */ 7734 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7735 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7736 reset_required = gaudi_tpc_read_interrupts(hdev, 7737 tpc_dec_event_to_tpc_id(event_type), 7738 "AXI_SLV_DEC_Error"); 7739 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7740 if (reset_required) { 7741 dev_err(hdev->dev, "reset required due to %s\n", 7742 gaudi_irq_map_table[event_type].name); 7743 7744 reset_direct = true; 7745 goto reset_device; 7746 } else { 7747 hl_fw_unmask_irq(hdev, event_type); 7748 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7749 } 7750 break; 7751 7752 case GAUDI_EVENT_TPC0_KRN_ERR: 7753 case GAUDI_EVENT_TPC1_KRN_ERR: 7754 case GAUDI_EVENT_TPC2_KRN_ERR: 7755 case GAUDI_EVENT_TPC3_KRN_ERR: 7756 case GAUDI_EVENT_TPC4_KRN_ERR: 7757 case GAUDI_EVENT_TPC5_KRN_ERR: 7758 case GAUDI_EVENT_TPC6_KRN_ERR: 7759 case GAUDI_EVENT_TPC7_KRN_ERR: 7760 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7761 reset_required = gaudi_tpc_read_interrupts(hdev, 7762 tpc_krn_event_to_tpc_id(event_type), 7763 "KRN_ERR"); 7764 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7765 if (reset_required) { 7766 dev_err(hdev->dev, "reset required due to %s\n", 7767 gaudi_irq_map_table[event_type].name); 7768 7769 reset_direct = true; 7770 goto reset_device; 7771 } else { 7772 hl_fw_unmask_irq(hdev, event_type); 7773 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7774 } 7775 break; 7776 7777 case GAUDI_EVENT_PCIE_CORE_SERR: 7778 case GAUDI_EVENT_PCIE_IF_SERR: 7779 case GAUDI_EVENT_PCIE_PHY_SERR: 7780 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7781 case GAUDI_EVENT_MME0_ACC_SERR: 7782 case GAUDI_EVENT_MME0_SBAB_SERR: 7783 case GAUDI_EVENT_MME1_ACC_SERR: 7784 case GAUDI_EVENT_MME1_SBAB_SERR: 7785 case GAUDI_EVENT_MME2_ACC_SERR: 7786 case GAUDI_EVENT_MME2_SBAB_SERR: 7787 case GAUDI_EVENT_MME3_ACC_SERR: 7788 case GAUDI_EVENT_MME3_SBAB_SERR: 7789 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7790 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7791 case GAUDI_EVENT_PSOC_MEM_SERR: 7792 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7793 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7794 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7795 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7796 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7797 fallthrough; 7798 case GAUDI_EVENT_MMU_SERR: 7799 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7800 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7801 hl_fw_unmask_irq(hdev, event_type); 7802 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7803 break; 7804 7805 case GAUDI_EVENT_PCIE_DEC: 7806 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7807 case GAUDI_EVENT_PSOC_AXI_DEC: 7808 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7809 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7810 hl_fw_unmask_irq(hdev, event_type); 7811 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7812 break; 7813 7814 case GAUDI_EVENT_MMU_PAGE_FAULT: 7815 case GAUDI_EVENT_MMU_WR_PERM: 7816 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7817 hl_fw_unmask_irq(hdev, event_type); 7818 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7819 break; 7820 7821 case GAUDI_EVENT_MME0_WBC_RSP: 7822 case GAUDI_EVENT_MME0_SBAB0_RSP: 7823 case GAUDI_EVENT_MME1_WBC_RSP: 7824 case GAUDI_EVENT_MME1_SBAB0_RSP: 7825 case GAUDI_EVENT_MME2_WBC_RSP: 7826 case GAUDI_EVENT_MME2_SBAB0_RSP: 7827 case GAUDI_EVENT_MME3_WBC_RSP: 7828 case GAUDI_EVENT_MME3_SBAB0_RSP: 7829 case GAUDI_EVENT_RAZWI_OR_ADC: 7830 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7831 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7832 fallthrough; 7833 case GAUDI_EVENT_NIC0_QM0: 7834 case GAUDI_EVENT_NIC0_QM1: 7835 case GAUDI_EVENT_NIC1_QM0: 7836 case GAUDI_EVENT_NIC1_QM1: 7837 case GAUDI_EVENT_NIC2_QM0: 7838 case GAUDI_EVENT_NIC2_QM1: 7839 case GAUDI_EVENT_NIC3_QM0: 7840 case GAUDI_EVENT_NIC3_QM1: 7841 case GAUDI_EVENT_NIC4_QM0: 7842 case GAUDI_EVENT_NIC4_QM1: 7843 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7844 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7845 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7846 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7847 hl_fw_unmask_irq(hdev, event_type); 7848 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7849 break; 7850 7851 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7852 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7853 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7854 goto reset_device; 7855 7856 case GAUDI_EVENT_TPC0_BMON_SPMU: 7857 case GAUDI_EVENT_TPC1_BMON_SPMU: 7858 case GAUDI_EVENT_TPC2_BMON_SPMU: 7859 case GAUDI_EVENT_TPC3_BMON_SPMU: 7860 case GAUDI_EVENT_TPC4_BMON_SPMU: 7861 case GAUDI_EVENT_TPC5_BMON_SPMU: 7862 case GAUDI_EVENT_TPC6_BMON_SPMU: 7863 case GAUDI_EVENT_TPC7_BMON_SPMU: 7864 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7865 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7866 hl_fw_unmask_irq(hdev, event_type); 7867 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7868 break; 7869 7870 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7871 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7872 hl_fw_unmask_irq(hdev, event_type); 7873 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7874 break; 7875 7876 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7877 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7878 gaudi_print_sm_sei_info(hdev, event_type, 7879 &eq_entry->sm_sei_data); 7880 rc = hl_state_dump(hdev); 7881 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7882 if (rc) 7883 dev_err(hdev->dev, 7884 "Error during system state dump %d\n", rc); 7885 hl_fw_unmask_irq(hdev, event_type); 7886 break; 7887 7888 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7889 break; 7890 7891 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7892 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7893 hl_fw_unmask_irq(hdev, event_type); 7894 break; 7895 7896 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7897 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7898 dev_err(hdev->dev, 7899 "Received high temp H/W interrupt %d (cause %d)\n", 7900 event_type, cause); 7901 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7902 break; 7903 7904 case GAUDI_EVENT_DEV_RESET_REQ: 7905 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7906 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7907 goto reset_device; 7908 7909 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7910 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7911 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7912 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7913 goto reset_device; 7914 7915 case GAUDI_EVENT_FW_ALIVE_S: 7916 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7917 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7918 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7919 goto reset_device; 7920 7921 default: 7922 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7923 event_type); 7924 break; 7925 } 7926 7927 if (event_mask) 7928 hl_notifier_event_send_all(hdev, event_mask); 7929 7930 return; 7931 7932 reset_device: 7933 reset_required = true; 7934 7935 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7936 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7937 7938 /* notify on device unavailable while the reset triggered by fw */ 7939 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7940 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7941 } else if (hdev->hard_reset_on_fw_events) { 7942 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7943 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7944 } else { 7945 reset_required = false; 7946 } 7947 7948 if (reset_required) { 7949 hl_device_cond_reset(hdev, flags, event_mask); 7950 } else { 7951 hl_fw_unmask_irq(hdev, event_type); 7952 /* Notification on occurred event needs to be sent although reset is not executed */ 7953 if (event_mask) 7954 hl_notifier_event_send_all(hdev, event_mask); 7955 } 7956 } 7957 7958 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7959 { 7960 struct gaudi_device *gaudi = hdev->asic_specific; 7961 7962 if (aggregate) { 7963 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7964 return gaudi->events_stat_aggregate; 7965 } 7966 7967 *size = (u32) sizeof(gaudi->events_stat); 7968 return gaudi->events_stat; 7969 } 7970 7971 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7972 { 7973 struct gaudi_device *gaudi = hdev->asic_specific; 7974 u32 status, timeout_usec; 7975 int rc; 7976 7977 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7978 hdev->reset_info.hard_reset_pending) 7979 return 0; 7980 7981 if (hdev->pldm) 7982 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7983 else 7984 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7985 7986 /* L0 & L1 invalidation */ 7987 WREG32(mmSTLB_INV_PS, 3); 7988 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7989 WREG32(mmSTLB_INV_PS, 2); 7990 7991 rc = hl_poll_timeout( 7992 hdev, 7993 mmSTLB_INV_PS, 7994 status, 7995 !status, 7996 1000, 7997 timeout_usec); 7998 7999 WREG32(mmSTLB_INV_SET, 0); 8000 8001 return rc; 8002 } 8003 8004 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 8005 bool is_hard, u32 flags, 8006 u32 asid, u64 va, u64 size) 8007 { 8008 /* Treat as invalidate all because there is no range invalidation 8009 * in Gaudi 8010 */ 8011 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 8012 } 8013 8014 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 8015 { 8016 u32 status, timeout_usec; 8017 int rc; 8018 8019 if (hdev->pldm) 8020 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 8021 else 8022 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 8023 8024 WREG32(MMU_ASID, asid); 8025 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 8026 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 8027 WREG32(MMU_BUSY, 0x80000000); 8028 8029 rc = hl_poll_timeout( 8030 hdev, 8031 MMU_BUSY, 8032 status, 8033 !(status & 0x80000000), 8034 1000, 8035 timeout_usec); 8036 8037 if (rc) { 8038 dev_err(hdev->dev, 8039 "Timeout during MMU hop0 config of asid %d\n", asid); 8040 return rc; 8041 } 8042 8043 return 0; 8044 } 8045 8046 static int gaudi_send_heartbeat(struct hl_device *hdev) 8047 { 8048 struct gaudi_device *gaudi = hdev->asic_specific; 8049 8050 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8051 return 0; 8052 8053 return hl_fw_send_heartbeat(hdev); 8054 } 8055 8056 static int gaudi_cpucp_info_get(struct hl_device *hdev) 8057 { 8058 struct gaudi_device *gaudi = hdev->asic_specific; 8059 struct asic_fixed_properties *prop = &hdev->asic_prop; 8060 int rc; 8061 8062 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8063 return 0; 8064 8065 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8066 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8067 mmCPU_BOOT_ERR1); 8068 if (rc) 8069 return rc; 8070 8071 if (!strlen(prop->cpucp_info.card_name)) 8072 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8073 CARD_NAME_MAX_LEN); 8074 8075 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8076 8077 set_default_power_values(hdev); 8078 8079 return 0; 8080 } 8081 8082 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8083 struct engines_data *e) 8084 { 8085 struct gaudi_device *gaudi = hdev->asic_specific; 8086 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8087 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8088 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8089 unsigned long *mask = (unsigned long *)mask_arr; 8090 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8091 bool is_idle = true, is_eng_idle, is_slave; 8092 u64 offset; 8093 int i, dma_id, port; 8094 8095 if (e) 8096 hl_engine_data_sprintf(e, 8097 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8098 "--- ------- ------------ ---------- -------------\n"); 8099 8100 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8101 dma_id = gaudi_dma_assignment[i]; 8102 offset = dma_id * DMA_QMAN_OFFSET; 8103 8104 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8105 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8106 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8107 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8108 IS_DMA_IDLE(dma_core_sts0); 8109 is_idle &= is_eng_idle; 8110 8111 if (mask && !is_eng_idle) 8112 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8113 if (e) 8114 hl_engine_data_sprintf(e, fmt, dma_id, 8115 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8116 qm_cgm_sts, dma_core_sts0); 8117 } 8118 8119 if (e) 8120 hl_engine_data_sprintf(e, 8121 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8122 "--- ------- ------------ ---------- ----------\n"); 8123 8124 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8125 offset = i * TPC_QMAN_OFFSET; 8126 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8127 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8128 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8129 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8130 IS_TPC_IDLE(tpc_cfg_sts); 8131 is_idle &= is_eng_idle; 8132 8133 if (mask && !is_eng_idle) 8134 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8135 if (e) 8136 hl_engine_data_sprintf(e, fmt, i, 8137 is_eng_idle ? "Y" : "N", 8138 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8139 } 8140 8141 if (e) 8142 hl_engine_data_sprintf(e, 8143 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8144 "--- ------- ------------ ---------- -----------\n"); 8145 8146 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8147 offset = i * MME_QMAN_OFFSET; 8148 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8149 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8150 8151 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8152 is_slave = i % 2; 8153 if (!is_slave) { 8154 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8155 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8156 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8157 } 8158 8159 is_idle &= is_eng_idle; 8160 8161 if (mask && !is_eng_idle) 8162 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8163 if (e) { 8164 if (!is_slave) 8165 hl_engine_data_sprintf(e, fmt, i, 8166 is_eng_idle ? "Y" : "N", 8167 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8168 else 8169 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8170 is_eng_idle ? "Y" : "N", "-", 8171 "-", mme_arch_sts); 8172 } 8173 } 8174 8175 if (e) 8176 hl_engine_data_sprintf(e, 8177 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8178 "--- ------- ------------ ----------\n"); 8179 8180 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8181 offset = i * NIC_MACRO_QMAN_OFFSET; 8182 port = 2 * i; 8183 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8184 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8185 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8186 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8187 is_idle &= is_eng_idle; 8188 8189 if (mask && !is_eng_idle) 8190 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8191 if (e) 8192 hl_engine_data_sprintf(e, nic_fmt, port, 8193 is_eng_idle ? "Y" : "N", 8194 qm_glbl_sts0, qm_cgm_sts); 8195 } 8196 8197 port = 2 * i + 1; 8198 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8199 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8200 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8201 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8202 is_idle &= is_eng_idle; 8203 8204 if (mask && !is_eng_idle) 8205 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8206 if (e) 8207 hl_engine_data_sprintf(e, nic_fmt, port, 8208 is_eng_idle ? "Y" : "N", 8209 qm_glbl_sts0, qm_cgm_sts); 8210 } 8211 } 8212 8213 if (e) 8214 hl_engine_data_sprintf(e, "\n"); 8215 8216 return is_idle; 8217 } 8218 8219 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8220 __acquires(&gaudi->hw_queues_lock) 8221 { 8222 struct gaudi_device *gaudi = hdev->asic_specific; 8223 8224 spin_lock(&gaudi->hw_queues_lock); 8225 } 8226 8227 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8228 __releases(&gaudi->hw_queues_lock) 8229 { 8230 struct gaudi_device *gaudi = hdev->asic_specific; 8231 8232 spin_unlock(&gaudi->hw_queues_lock); 8233 } 8234 8235 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8236 { 8237 return hdev->pdev->device; 8238 } 8239 8240 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8241 size_t max_size) 8242 { 8243 struct gaudi_device *gaudi = hdev->asic_specific; 8244 8245 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8246 return 0; 8247 8248 return hl_fw_get_eeprom_data(hdev, data, max_size); 8249 } 8250 8251 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8252 { 8253 struct gaudi_device *gaudi = hdev->asic_specific; 8254 8255 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8256 return 0; 8257 8258 return hl_fw_get_monitor_dump(hdev, data); 8259 } 8260 8261 /* 8262 * this function should be used only during initialization and/or after reset, 8263 * when there are no active users. 8264 */ 8265 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8266 { 8267 u64 kernel_timeout; 8268 u32 status, offset; 8269 int rc; 8270 8271 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8272 8273 if (hdev->pldm) 8274 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8275 else 8276 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8277 8278 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8279 lower_32_bits(tpc_kernel)); 8280 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8281 upper_32_bits(tpc_kernel)); 8282 8283 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8284 lower_32_bits(tpc_kernel)); 8285 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8286 upper_32_bits(tpc_kernel)); 8287 /* set a valid LUT pointer, content is of no significance */ 8288 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8289 lower_32_bits(tpc_kernel)); 8290 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8291 upper_32_bits(tpc_kernel)); 8292 8293 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8294 lower_32_bits(CFG_BASE + 8295 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8296 8297 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8298 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8299 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8300 /* wait a bit for the engine to start executing */ 8301 usleep_range(1000, 1500); 8302 8303 /* wait until engine has finished executing */ 8304 rc = hl_poll_timeout( 8305 hdev, 8306 mmTPC0_CFG_STATUS + offset, 8307 status, 8308 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8309 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8310 1000, 8311 kernel_timeout); 8312 8313 if (rc) { 8314 dev_err(hdev->dev, 8315 "Timeout while waiting for TPC%d icache prefetch\n", 8316 tpc_id); 8317 return -EIO; 8318 } 8319 8320 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8321 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8322 8323 /* wait a bit for the engine to start executing */ 8324 usleep_range(1000, 1500); 8325 8326 /* wait until engine has finished executing */ 8327 rc = hl_poll_timeout( 8328 hdev, 8329 mmTPC0_CFG_STATUS + offset, 8330 status, 8331 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8332 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8333 1000, 8334 kernel_timeout); 8335 8336 if (rc) { 8337 dev_err(hdev->dev, 8338 "Timeout while waiting for TPC%d vector pipe\n", 8339 tpc_id); 8340 return -EIO; 8341 } 8342 8343 rc = hl_poll_timeout( 8344 hdev, 8345 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8346 status, 8347 (status == 0), 8348 1000, 8349 kernel_timeout); 8350 8351 if (rc) { 8352 dev_err(hdev->dev, 8353 "Timeout while waiting for TPC%d kernel to execute\n", 8354 tpc_id); 8355 return -EIO; 8356 } 8357 8358 return 0; 8359 } 8360 8361 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8362 struct hl_ctx *ctx) 8363 { 8364 struct gaudi_device *gaudi = hdev->asic_specific; 8365 int min_alloc_order, rc, collective_cb_size; 8366 8367 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8368 return 0; 8369 8370 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8371 HOST_SPACE_INTERNAL_CB_SZ, 8372 &hdev->internal_cb_pool_dma_addr, 8373 GFP_KERNEL | __GFP_ZERO); 8374 8375 if (!hdev->internal_cb_pool_virt_addr) 8376 return -ENOMEM; 8377 8378 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8379 sizeof(struct packet_fence); 8380 min_alloc_order = ilog2(collective_cb_size); 8381 8382 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8383 if (!hdev->internal_cb_pool) { 8384 dev_err(hdev->dev, 8385 "Failed to create internal CB pool\n"); 8386 rc = -ENOMEM; 8387 goto free_internal_cb_pool; 8388 } 8389 8390 rc = gen_pool_add(hdev->internal_cb_pool, 8391 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8392 HOST_SPACE_INTERNAL_CB_SZ, -1); 8393 if (rc) { 8394 dev_err(hdev->dev, 8395 "Failed to add memory to internal CB pool\n"); 8396 rc = -EFAULT; 8397 goto destroy_internal_cb_pool; 8398 } 8399 8400 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8401 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8402 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8403 8404 if (!hdev->internal_cb_va_base) { 8405 rc = -ENOMEM; 8406 goto destroy_internal_cb_pool; 8407 } 8408 8409 mutex_lock(&hdev->mmu_lock); 8410 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8411 hdev->internal_cb_pool_dma_addr, 8412 HOST_SPACE_INTERNAL_CB_SZ); 8413 8414 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8415 mutex_unlock(&hdev->mmu_lock); 8416 8417 if (rc) 8418 goto unreserve_internal_cb_pool; 8419 8420 return 0; 8421 8422 unreserve_internal_cb_pool: 8423 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8424 HOST_SPACE_INTERNAL_CB_SZ); 8425 destroy_internal_cb_pool: 8426 gen_pool_destroy(hdev->internal_cb_pool); 8427 free_internal_cb_pool: 8428 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8429 hdev->internal_cb_pool_dma_addr); 8430 8431 return rc; 8432 } 8433 8434 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8435 struct hl_ctx *ctx) 8436 { 8437 struct gaudi_device *gaudi = hdev->asic_specific; 8438 8439 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8440 return; 8441 8442 mutex_lock(&hdev->mmu_lock); 8443 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8444 HOST_SPACE_INTERNAL_CB_SZ); 8445 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8446 HOST_SPACE_INTERNAL_CB_SZ); 8447 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8448 mutex_unlock(&hdev->mmu_lock); 8449 8450 gen_pool_destroy(hdev->internal_cb_pool); 8451 8452 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8453 hdev->internal_cb_pool_dma_addr); 8454 } 8455 8456 static int gaudi_ctx_init(struct hl_ctx *ctx) 8457 { 8458 int rc; 8459 8460 if (ctx->asid == HL_KERNEL_ASID_ID) 8461 return 0; 8462 8463 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8464 if (rc) 8465 return rc; 8466 8467 rc = gaudi_restore_user_registers(ctx->hdev); 8468 if (rc) 8469 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8470 8471 return rc; 8472 } 8473 8474 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8475 { 8476 if (ctx->asid == HL_KERNEL_ASID_ID) 8477 return; 8478 8479 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8480 } 8481 8482 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8483 { 8484 return 0; 8485 } 8486 8487 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8488 { 8489 return gaudi_cq_assignment[cq_idx]; 8490 } 8491 8492 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8493 { 8494 return sizeof(struct packet_msg_short) + 8495 sizeof(struct packet_msg_prot) * 2; 8496 } 8497 8498 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8499 { 8500 return sizeof(struct packet_msg_short) * 4 + 8501 sizeof(struct packet_fence) + 8502 sizeof(struct packet_msg_prot) * 2; 8503 } 8504 8505 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8506 { 8507 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8508 } 8509 8510 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8511 u32 size, bool eb) 8512 { 8513 struct hl_cb *cb = (struct hl_cb *) data; 8514 struct packet_msg_short *pkt; 8515 u32 value, ctl, pkt_size = sizeof(*pkt); 8516 8517 pkt = cb->kernel_address + size; 8518 memset(pkt, 0, pkt_size); 8519 8520 /* Inc by 1, Mode ADD */ 8521 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8522 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8523 8524 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8525 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8526 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8527 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8528 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8529 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8530 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8531 8532 pkt->value = cpu_to_le32(value); 8533 pkt->ctl = cpu_to_le32(ctl); 8534 8535 return size + pkt_size; 8536 } 8537 8538 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8539 u16 addr) 8540 { 8541 u32 ctl, pkt_size = sizeof(*pkt); 8542 8543 memset(pkt, 0, pkt_size); 8544 8545 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8546 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8551 8552 pkt->value = cpu_to_le32(value); 8553 pkt->ctl = cpu_to_le32(ctl); 8554 8555 return pkt_size; 8556 } 8557 8558 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8559 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8560 u16 sob_val, u16 mon_id) 8561 { 8562 u64 monitor_base; 8563 u32 ctl, value, pkt_size = sizeof(*pkt); 8564 u16 msg_addr_offset; 8565 u8 mask; 8566 8567 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8568 dev_err(hdev->dev, 8569 "sob_base %u (mask %#x) is not valid\n", 8570 sob_base, sob_mask); 8571 return 0; 8572 } 8573 8574 /* 8575 * monitor_base should be the content of the base0 address registers, 8576 * so it will be added to the msg short offsets 8577 */ 8578 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8579 8580 msg_addr_offset = 8581 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8582 monitor_base; 8583 8584 memset(pkt, 0, pkt_size); 8585 8586 /* Monitor config packet: bind the monitor to a sync object */ 8587 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8588 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8589 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8590 0); /* GREATER OR EQUAL*/ 8591 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8592 8593 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8594 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8595 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8596 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8597 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8598 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8599 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8600 8601 pkt->value = cpu_to_le32(value); 8602 pkt->ctl = cpu_to_le32(ctl); 8603 8604 return pkt_size; 8605 } 8606 8607 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8608 { 8609 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8610 8611 memset(pkt, 0, pkt_size); 8612 8613 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8614 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8615 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8616 8617 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8618 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8619 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8620 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8621 8622 pkt->cfg = cpu_to_le32(cfg); 8623 pkt->ctl = cpu_to_le32(ctl); 8624 8625 return pkt_size; 8626 } 8627 8628 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8629 { 8630 u32 offset, nic_index; 8631 8632 switch (queue_id) { 8633 case GAUDI_QUEUE_ID_DMA_0_0: 8634 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8635 break; 8636 case GAUDI_QUEUE_ID_DMA_0_1: 8637 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8638 break; 8639 case GAUDI_QUEUE_ID_DMA_0_2: 8640 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8641 break; 8642 case GAUDI_QUEUE_ID_DMA_0_3: 8643 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8644 break; 8645 case GAUDI_QUEUE_ID_DMA_1_0: 8646 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8647 break; 8648 case GAUDI_QUEUE_ID_DMA_1_1: 8649 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8650 break; 8651 case GAUDI_QUEUE_ID_DMA_1_2: 8652 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8653 break; 8654 case GAUDI_QUEUE_ID_DMA_1_3: 8655 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8656 break; 8657 case GAUDI_QUEUE_ID_DMA_5_0: 8658 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8659 break; 8660 case GAUDI_QUEUE_ID_DMA_5_1: 8661 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8662 break; 8663 case GAUDI_QUEUE_ID_DMA_5_2: 8664 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8665 break; 8666 case GAUDI_QUEUE_ID_DMA_5_3: 8667 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8668 break; 8669 case GAUDI_QUEUE_ID_TPC_7_0: 8670 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8671 break; 8672 case GAUDI_QUEUE_ID_TPC_7_1: 8673 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8674 break; 8675 case GAUDI_QUEUE_ID_TPC_7_2: 8676 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8677 break; 8678 case GAUDI_QUEUE_ID_TPC_7_3: 8679 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8680 break; 8681 case GAUDI_QUEUE_ID_NIC_0_0: 8682 case GAUDI_QUEUE_ID_NIC_1_0: 8683 case GAUDI_QUEUE_ID_NIC_2_0: 8684 case GAUDI_QUEUE_ID_NIC_3_0: 8685 case GAUDI_QUEUE_ID_NIC_4_0: 8686 case GAUDI_QUEUE_ID_NIC_5_0: 8687 case GAUDI_QUEUE_ID_NIC_6_0: 8688 case GAUDI_QUEUE_ID_NIC_7_0: 8689 case GAUDI_QUEUE_ID_NIC_8_0: 8690 case GAUDI_QUEUE_ID_NIC_9_0: 8691 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8692 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8693 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8694 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8695 break; 8696 case GAUDI_QUEUE_ID_NIC_0_1: 8697 case GAUDI_QUEUE_ID_NIC_1_1: 8698 case GAUDI_QUEUE_ID_NIC_2_1: 8699 case GAUDI_QUEUE_ID_NIC_3_1: 8700 case GAUDI_QUEUE_ID_NIC_4_1: 8701 case GAUDI_QUEUE_ID_NIC_5_1: 8702 case GAUDI_QUEUE_ID_NIC_6_1: 8703 case GAUDI_QUEUE_ID_NIC_7_1: 8704 case GAUDI_QUEUE_ID_NIC_8_1: 8705 case GAUDI_QUEUE_ID_NIC_9_1: 8706 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8707 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8708 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8709 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8710 break; 8711 case GAUDI_QUEUE_ID_NIC_0_2: 8712 case GAUDI_QUEUE_ID_NIC_1_2: 8713 case GAUDI_QUEUE_ID_NIC_2_2: 8714 case GAUDI_QUEUE_ID_NIC_3_2: 8715 case GAUDI_QUEUE_ID_NIC_4_2: 8716 case GAUDI_QUEUE_ID_NIC_5_2: 8717 case GAUDI_QUEUE_ID_NIC_6_2: 8718 case GAUDI_QUEUE_ID_NIC_7_2: 8719 case GAUDI_QUEUE_ID_NIC_8_2: 8720 case GAUDI_QUEUE_ID_NIC_9_2: 8721 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8722 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8723 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8724 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8725 break; 8726 case GAUDI_QUEUE_ID_NIC_0_3: 8727 case GAUDI_QUEUE_ID_NIC_1_3: 8728 case GAUDI_QUEUE_ID_NIC_2_3: 8729 case GAUDI_QUEUE_ID_NIC_3_3: 8730 case GAUDI_QUEUE_ID_NIC_4_3: 8731 case GAUDI_QUEUE_ID_NIC_5_3: 8732 case GAUDI_QUEUE_ID_NIC_6_3: 8733 case GAUDI_QUEUE_ID_NIC_7_3: 8734 case GAUDI_QUEUE_ID_NIC_8_3: 8735 case GAUDI_QUEUE_ID_NIC_9_3: 8736 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8737 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8738 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8739 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8740 break; 8741 default: 8742 return -EINVAL; 8743 } 8744 8745 *addr = CFG_BASE + offset; 8746 8747 return 0; 8748 } 8749 8750 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8751 { 8752 u64 monitor_base; 8753 u32 size = 0; 8754 u16 msg_addr_offset; 8755 8756 /* 8757 * monitor_base should be the content of the base0 address registers, 8758 * so it will be added to the msg short offsets 8759 */ 8760 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8761 8762 /* First monitor config packet: low address of the sync */ 8763 msg_addr_offset = 8764 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8765 monitor_base; 8766 8767 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8768 msg_addr_offset); 8769 8770 /* Second monitor config packet: high address of the sync */ 8771 msg_addr_offset = 8772 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8773 monitor_base; 8774 8775 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8776 msg_addr_offset); 8777 8778 /* 8779 * Third monitor config packet: the payload, i.e. what to write when the 8780 * sync triggers 8781 */ 8782 msg_addr_offset = 8783 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8784 monitor_base; 8785 8786 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8787 8788 return size; 8789 } 8790 8791 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8792 struct hl_gen_wait_properties *prop) 8793 { 8794 struct hl_cb *cb = (struct hl_cb *) prop->data; 8795 void *buf = cb->kernel_address; 8796 u64 fence_addr = 0; 8797 u32 size = prop->size; 8798 8799 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8800 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8801 prop->q_idx); 8802 return 0; 8803 } 8804 8805 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8806 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8807 prop->sob_mask, prop->sob_val, prop->mon_id); 8808 size += gaudi_add_fence_pkt(buf + size); 8809 8810 return size; 8811 } 8812 8813 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8814 { 8815 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8816 8817 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8818 hw_sob->sob_id); 8819 8820 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8821 hw_sob->sob_id * 4, 0); 8822 8823 kref_init(&hw_sob->kref); 8824 } 8825 8826 static u64 gaudi_get_device_time(struct hl_device *hdev) 8827 { 8828 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8829 8830 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8831 } 8832 8833 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8834 u32 *block_size, u32 *block_id) 8835 { 8836 return -EPERM; 8837 } 8838 8839 static int gaudi_block_mmap(struct hl_device *hdev, 8840 struct vm_area_struct *vma, 8841 u32 block_id, u32 block_size) 8842 { 8843 return -EPERM; 8844 } 8845 8846 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8847 { 8848 struct cpu_dyn_regs *dyn_regs = 8849 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8850 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8851 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8852 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8853 8854 WREG32(irq_handler_offset, 8855 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8856 } 8857 8858 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8859 { 8860 return -EINVAL; 8861 } 8862 8863 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8864 { 8865 switch (pll_idx) { 8866 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8867 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8868 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8869 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8870 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8871 case HL_GAUDI_MME_PLL: return MME_PLL; 8872 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8873 case HL_GAUDI_IF_PLL: return IF_PLL; 8874 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8875 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8876 default: return -EINVAL; 8877 } 8878 } 8879 8880 static int gaudi_add_sync_to_engine_map_entry( 8881 struct hl_sync_to_engine_map *map, u32 reg_value, 8882 enum hl_sync_engine_type engine_type, u32 engine_id) 8883 { 8884 struct hl_sync_to_engine_map_entry *entry; 8885 8886 /* Reg value represents a partial address of sync object, 8887 * it is used as unique identifier. For this we need to 8888 * clear the cutoff cfg base bits from the value. 8889 */ 8890 if (reg_value == 0 || reg_value == 0xffffffff) 8891 return 0; 8892 reg_value -= lower_32_bits(CFG_BASE); 8893 8894 /* create a new hash entry */ 8895 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8896 if (!entry) 8897 return -ENOMEM; 8898 entry->engine_type = engine_type; 8899 entry->engine_id = engine_id; 8900 entry->sync_id = reg_value; 8901 hash_add(map->tb, &entry->node, reg_value); 8902 8903 return 0; 8904 } 8905 8906 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8907 struct hl_sync_to_engine_map *map) 8908 { 8909 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8910 int i, j, rc; 8911 u32 reg_value; 8912 8913 /* Iterate over TPC engines */ 8914 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8915 8916 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8917 sds->props[SP_NEXT_TPC] * i); 8918 8919 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8920 ENGINE_TPC, i); 8921 if (rc) 8922 goto free_sync_to_engine_map; 8923 } 8924 8925 /* Iterate over MME engines */ 8926 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8927 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8928 8929 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8930 sds->props[SP_NEXT_MME] * i + 8931 j * sizeof(u32)); 8932 8933 rc = gaudi_add_sync_to_engine_map_entry( 8934 map, reg_value, ENGINE_MME, 8935 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8936 if (rc) 8937 goto free_sync_to_engine_map; 8938 } 8939 } 8940 8941 /* Iterate over DMA engines */ 8942 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8943 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8944 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8945 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8946 ENGINE_DMA, i); 8947 if (rc) 8948 goto free_sync_to_engine_map; 8949 } 8950 8951 return 0; 8952 8953 free_sync_to_engine_map: 8954 hl_state_dump_free_sync_to_engine_map(map); 8955 8956 return rc; 8957 } 8958 8959 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8960 { 8961 return FIELD_GET( 8962 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8963 mon->status); 8964 } 8965 8966 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8967 { 8968 const size_t max_write = 10; 8969 u32 gid, mask, sob; 8970 int i, offset; 8971 8972 /* Sync object ID is calculated as follows: 8973 * (8 * group_id + cleared bits in mask) 8974 */ 8975 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8976 mon->arm_data); 8977 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8978 mon->arm_data); 8979 8980 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8981 max_write; mask >>= 1, i++) { 8982 if (!(mask & 1)) { 8983 sob = gid * MONITOR_MAX_SOBS + i; 8984 8985 if (offset > 0) 8986 offset += snprintf(sobs + offset, max_write, 8987 ", "); 8988 8989 offset += snprintf(sobs + offset, max_write, "%u", sob); 8990 } 8991 } 8992 } 8993 8994 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8995 struct hl_device *hdev, 8996 struct hl_mon_state_dump *mon) 8997 { 8998 const char *name; 8999 char scratch_buf1[BIN_REG_STRING_SIZE], 9000 scratch_buf2[BIN_REG_STRING_SIZE]; 9001 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 9002 9003 name = hl_state_dump_get_monitor_name(hdev, mon); 9004 if (!name) 9005 name = ""; 9006 9007 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 9008 9009 return hl_snprintf_resize( 9010 buf, size, offset, 9011 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 9012 mon->id, name, 9013 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 9014 mon->arm_data), 9015 hl_format_as_binary( 9016 scratch_buf1, sizeof(scratch_buf1), 9017 FIELD_GET( 9018 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 9019 mon->arm_data)), 9020 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 9021 mon->arm_data), 9022 mon->wr_data, 9023 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 9024 hl_format_as_binary( 9025 scratch_buf2, sizeof(scratch_buf2), 9026 FIELD_GET( 9027 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 9028 mon->status)), 9029 monitored_sobs); 9030 } 9031 9032 9033 static int gaudi_print_fences_single_engine( 9034 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 9035 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 9036 size_t *size, size_t *offset) 9037 { 9038 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9039 int rc = -ENOMEM, i; 9040 u32 *statuses, *fences; 9041 9042 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 9043 sizeof(*statuses), GFP_KERNEL); 9044 if (!statuses) 9045 goto out; 9046 9047 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 9048 sds->props[SP_ENGINE_NUM_OF_QUEUES], 9049 sizeof(*fences), GFP_KERNEL); 9050 if (!fences) 9051 goto free_status; 9052 9053 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9054 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9055 9056 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9057 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9058 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9059 9060 /* The actual print */ 9061 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9062 u32 fence_id; 9063 u64 fence_cnt, fence_rdata; 9064 const char *engine_name; 9065 9066 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9067 statuses[i])) 9068 continue; 9069 9070 fence_id = 9071 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9072 fence_cnt = base_offset + CFG_BASE + 9073 sizeof(u32) * 9074 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9075 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9076 sds->props[SP_FENCE0_RDATA_OFFSET]; 9077 engine_name = hl_sync_engine_to_string(engine_type); 9078 9079 rc = hl_snprintf_resize( 9080 buf, size, offset, 9081 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9082 engine_name, engine_id, 9083 i, fence_id, 9084 fence_cnt, engine_name, engine_id, fence_id, i, 9085 fence_rdata, engine_name, engine_id, fence_id, i, 9086 fences[fence_id], 9087 statuses[i]); 9088 if (rc) 9089 goto free_fences; 9090 } 9091 9092 rc = 0; 9093 9094 free_fences: 9095 kfree(fences); 9096 free_status: 9097 kfree(statuses); 9098 out: 9099 return rc; 9100 } 9101 9102 9103 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9104 .monitor_valid = gaudi_monitor_valid, 9105 .print_single_monitor = gaudi_print_single_monitor, 9106 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9107 .print_fences_single_engine = gaudi_print_fences_single_engine, 9108 }; 9109 9110 static void gaudi_state_dump_init(struct hl_device *hdev) 9111 { 9112 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9113 int i; 9114 9115 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9116 hash_add(sds->so_id_to_str_tb, 9117 &gaudi_so_id_to_str[i].node, 9118 gaudi_so_id_to_str[i].id); 9119 9120 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9121 hash_add(sds->monitor_id_to_str_tb, 9122 &gaudi_monitor_id_to_str[i].node, 9123 gaudi_monitor_id_to_str[i].id); 9124 9125 sds->props = gaudi_state_dump_specs_props; 9126 9127 sds->sync_namager_names = gaudi_sync_manager_names; 9128 9129 sds->funcs = gaudi_state_dump_funcs; 9130 } 9131 9132 static u32 *gaudi_get_stream_master_qid_arr(void) 9133 { 9134 return gaudi_stream_master; 9135 } 9136 9137 static int gaudi_set_dram_properties(struct hl_device *hdev) 9138 { 9139 return 0; 9140 } 9141 9142 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9143 { 9144 } 9145 9146 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9147 { 9148 struct hl_device *hdev = dev_get_drvdata(dev); 9149 struct cpucp_info *cpucp_info; 9150 9151 cpucp_info = &hdev->asic_prop.cpucp_info; 9152 9153 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9154 } 9155 9156 static DEVICE_ATTR_RO(infineon_ver); 9157 9158 static struct attribute *gaudi_vrm_dev_attrs[] = { 9159 &dev_attr_infineon_ver.attr, 9160 NULL, 9161 }; 9162 9163 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9164 struct attribute_group *dev_vrm_attr_grp) 9165 { 9166 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9167 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9168 } 9169 9170 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9171 { 9172 return 0; 9173 } 9174 9175 static const struct hl_asic_funcs gaudi_funcs = { 9176 .early_init = gaudi_early_init, 9177 .early_fini = gaudi_early_fini, 9178 .late_init = gaudi_late_init, 9179 .late_fini = gaudi_late_fini, 9180 .sw_init = gaudi_sw_init, 9181 .sw_fini = gaudi_sw_fini, 9182 .hw_init = gaudi_hw_init, 9183 .hw_fini = gaudi_hw_fini, 9184 .halt_engines = gaudi_halt_engines, 9185 .suspend = gaudi_suspend, 9186 .resume = gaudi_resume, 9187 .mmap = gaudi_mmap, 9188 .ring_doorbell = gaudi_ring_doorbell, 9189 .pqe_write = gaudi_pqe_write, 9190 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9191 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9192 .scrub_device_mem = gaudi_scrub_device_mem, 9193 .scrub_device_dram = gaudi_scrub_device_dram, 9194 .get_int_queue_base = gaudi_get_int_queue_base, 9195 .test_queues = gaudi_test_queues, 9196 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9197 .asic_dma_pool_free = gaudi_dma_pool_free, 9198 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9199 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9200 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9201 .cs_parser = gaudi_cs_parser, 9202 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9203 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9204 .update_eq_ci = gaudi_update_eq_ci, 9205 .context_switch = gaudi_context_switch, 9206 .restore_phase_topology = gaudi_restore_phase_topology, 9207 .debugfs_read_dma = gaudi_debugfs_read_dma, 9208 .add_device_attr = gaudi_add_device_attr, 9209 .handle_eqe = gaudi_handle_eqe, 9210 .get_events_stat = gaudi_get_events_stat, 9211 .read_pte = gaudi_read_pte, 9212 .write_pte = gaudi_write_pte, 9213 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9214 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9215 .mmu_prefetch_cache_range = NULL, 9216 .send_heartbeat = gaudi_send_heartbeat, 9217 .debug_coresight = gaudi_debug_coresight, 9218 .is_device_idle = gaudi_is_device_idle, 9219 .compute_reset_late_init = gaudi_compute_reset_late_init, 9220 .hw_queues_lock = gaudi_hw_queues_lock, 9221 .hw_queues_unlock = gaudi_hw_queues_unlock, 9222 .get_pci_id = gaudi_get_pci_id, 9223 .get_eeprom_data = gaudi_get_eeprom_data, 9224 .get_monitor_dump = gaudi_get_monitor_dump, 9225 .send_cpu_message = gaudi_send_cpu_message, 9226 .pci_bars_map = gaudi_pci_bars_map, 9227 .init_iatu = gaudi_init_iatu, 9228 .rreg = hl_rreg, 9229 .wreg = hl_wreg, 9230 .halt_coresight = gaudi_halt_coresight, 9231 .ctx_init = gaudi_ctx_init, 9232 .ctx_fini = gaudi_ctx_fini, 9233 .pre_schedule_cs = gaudi_pre_schedule_cs, 9234 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9235 .load_firmware_to_device = gaudi_load_firmware_to_device, 9236 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9237 .get_signal_cb_size = gaudi_get_signal_cb_size, 9238 .get_wait_cb_size = gaudi_get_wait_cb_size, 9239 .gen_signal_cb = gaudi_gen_signal_cb, 9240 .gen_wait_cb = gaudi_gen_wait_cb, 9241 .reset_sob = gaudi_reset_sob, 9242 .reset_sob_group = gaudi_reset_sob_group, 9243 .get_device_time = gaudi_get_device_time, 9244 .pb_print_security_errors = NULL, 9245 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9246 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9247 .get_dec_base_addr = NULL, 9248 .scramble_addr = hl_mmu_scramble_addr, 9249 .descramble_addr = hl_mmu_descramble_addr, 9250 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9251 .get_hw_block_id = gaudi_get_hw_block_id, 9252 .hw_block_mmap = gaudi_block_mmap, 9253 .enable_events_from_fw = gaudi_enable_events_from_fw, 9254 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9255 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9256 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9257 .init_firmware_loader = gaudi_init_firmware_loader, 9258 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9259 .state_dump_init = gaudi_state_dump_init, 9260 .get_sob_addr = gaudi_get_sob_addr, 9261 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9262 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9263 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9264 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9265 .access_dev_mem = hl_access_dev_mem, 9266 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9267 .send_device_activity = gaudi_send_device_activity, 9268 .set_dram_properties = gaudi_set_dram_properties, 9269 }; 9270 9271 /** 9272 * gaudi_set_asic_funcs - set GAUDI function pointers 9273 * 9274 * @hdev: pointer to hl_device structure 9275 * 9276 */ 9277 void gaudi_set_asic_funcs(struct hl_device *hdev) 9278 { 9279 hdev->asic_funcs = &gaudi_funcs; 9280 } 9281