1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85 #define GAUDI_MAX_STRING_LEN 20 86 87 #define GAUDI_CB_POOL_CB_CNT 512 88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 101 102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 103 104 #define MONITOR_SOB_STRING_SIZE 256 105 106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 107 GAUDI_QUEUE_ID_DMA_0_0, 108 GAUDI_QUEUE_ID_DMA_0_1, 109 GAUDI_QUEUE_ID_DMA_0_2, 110 GAUDI_QUEUE_ID_DMA_0_3, 111 GAUDI_QUEUE_ID_DMA_1_0, 112 GAUDI_QUEUE_ID_DMA_1_1, 113 GAUDI_QUEUE_ID_DMA_1_2, 114 GAUDI_QUEUE_ID_DMA_1_3 115 }; 116 117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 121 "gaudi cpu eq" 122 }; 123 124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 133 }; 134 135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 136 [0] = GAUDI_QUEUE_ID_DMA_0_0, 137 [1] = GAUDI_QUEUE_ID_DMA_0_1, 138 [2] = GAUDI_QUEUE_ID_DMA_0_2, 139 [3] = GAUDI_QUEUE_ID_DMA_0_3, 140 [4] = GAUDI_QUEUE_ID_DMA_1_0, 141 [5] = GAUDI_QUEUE_ID_DMA_1_1, 142 [6] = GAUDI_QUEUE_ID_DMA_1_2, 143 [7] = GAUDI_QUEUE_ID_DMA_1_3, 144 }; 145 146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 147 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 152 [PACKET_REPEAT] = sizeof(struct packet_repeat), 153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 154 [PACKET_FENCE] = sizeof(struct packet_fence), 155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 156 [PACKET_NOP] = sizeof(struct packet_nop), 157 [PACKET_STOP] = sizeof(struct packet_stop), 158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 159 [PACKET_WAIT] = sizeof(struct packet_wait), 160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 161 }; 162 163 static inline bool validate_packet_id(enum packet_id id) 164 { 165 switch (id) { 166 case PACKET_WREG_32: 167 case PACKET_WREG_BULK: 168 case PACKET_MSG_LONG: 169 case PACKET_MSG_SHORT: 170 case PACKET_CP_DMA: 171 case PACKET_REPEAT: 172 case PACKET_MSG_PROT: 173 case PACKET_FENCE: 174 case PACKET_LIN_DMA: 175 case PACKET_NOP: 176 case PACKET_STOP: 177 case PACKET_ARB_POINT: 178 case PACKET_WAIT: 179 case PACKET_LOAD_AND_EXE: 180 return true; 181 default: 182 return false; 183 } 184 } 185 186 static const char * const 187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 188 "tpc_address_exceed_slm", 189 "tpc_div_by_0", 190 "tpc_spu_mac_overflow", 191 "tpc_spu_addsub_overflow", 192 "tpc_spu_abs_overflow", 193 "tpc_spu_fp_dst_nan_inf", 194 "tpc_spu_fp_dst_denorm", 195 "tpc_vpu_mac_overflow", 196 "tpc_vpu_addsub_overflow", 197 "tpc_vpu_abs_overflow", 198 "tpc_vpu_fp_dst_nan_inf", 199 "tpc_vpu_fp_dst_denorm", 200 "tpc_assertions", 201 "tpc_illegal_instruction", 202 "tpc_pc_wrap_around", 203 "tpc_qm_sw_err", 204 "tpc_hbw_rresp_err", 205 "tpc_hbw_bresp_err", 206 "tpc_lbw_rresp_err", 207 "tpc_lbw_bresp_err" 208 }; 209 210 static const char * const 211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 212 "PQ AXI HBW error", 213 "CQ AXI HBW error", 214 "CP AXI HBW error", 215 "CP error due to undefined OPCODE", 216 "CP encountered STOP OPCODE", 217 "CP AXI LBW error", 218 "CP WRREG32 or WRBULK returned error", 219 "N/A", 220 "FENCE 0 inc over max value and clipped", 221 "FENCE 1 inc over max value and clipped", 222 "FENCE 2 inc over max value and clipped", 223 "FENCE 3 inc over max value and clipped", 224 "FENCE 0 dec under min value and clipped", 225 "FENCE 1 dec under min value and clipped", 226 "FENCE 2 dec under min value and clipped", 227 "FENCE 3 dec under min value and clipped" 228 }; 229 230 static const char * const 231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 232 "Choice push while full error", 233 "Choice Q watchdog error", 234 "MSG AXI LBW returned with error" 235 }; 236 237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 351 }; 352 353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 381 }; 382 383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 395 }; 396 397 static s64 gaudi_state_dump_specs_props[] = { 398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 401 [SP_MON_OBJ_WR_ADDR_LOW] = 402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 403 [SP_MON_OBJ_WR_ADDR_HIGH] = 404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 425 [SP_FENCE0_CNT_OFFSET] = 426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_FENCE0_RDATA_OFFSET] = 428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 430 [SP_NUM_CORES] = 1, 431 }; 432 433 static const int gaudi_queue_id_to_engine_id[] = { 434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 463 }; 464 465 /* The order here is opposite to the order of the indexing in the h/w. 466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 467 */ 468 static const char * const gaudi_sync_manager_names[] = { 469 "SYNC_MGR_E_N", 470 "SYNC_MGR_W_N", 471 "SYNC_MGR_E_S", 472 "SYNC_MGR_W_S", 473 NULL 474 }; 475 476 struct ecc_info_extract_params { 477 u64 block_address; 478 u32 num_memories; 479 bool derr; 480 }; 481 482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 483 u64 phys_addr); 484 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 485 struct hl_cs_job *job); 486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 487 u32 size, u64 val); 488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 489 u32 num_regs, u32 val); 490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 491 u32 tpc_id); 492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 493 static int gaudi_cpucp_info_get(struct hl_device *hdev); 494 static void gaudi_disable_clock_gating(struct hl_device *hdev); 495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 497 u32 size, bool eb); 498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 499 struct hl_gen_wait_properties *prop); 500 static inline enum hl_collective_mode 501 get_collective_mode(struct hl_device *hdev, u32 queue_id) 502 { 503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 504 return HL_COLLECTIVE_MASTER; 505 506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 508 return HL_COLLECTIVE_SLAVE; 509 510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 512 return HL_COLLECTIVE_SLAVE; 513 514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 516 return HL_COLLECTIVE_SLAVE; 517 518 return HL_COLLECTIVE_NOT_SUPPORTED; 519 } 520 521 static inline void set_default_power_values(struct hl_device *hdev) 522 { 523 struct asic_fixed_properties *prop = &hdev->asic_prop; 524 525 if (hdev->card_type == cpucp_card_type_pmc) { 526 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 527 528 if (prop->fw_security_enabled) 529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 530 else 531 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 532 } else { 533 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 534 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 535 } 536 } 537 538 static int gaudi_set_fixed_properties(struct hl_device *hdev) 539 { 540 struct asic_fixed_properties *prop = &hdev->asic_prop; 541 u32 num_sync_stream_queues = 0; 542 int i; 543 544 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 545 prop->hw_queues_props = kcalloc(prop->max_queues, 546 sizeof(struct hw_queue_properties), 547 GFP_KERNEL); 548 549 if (!prop->hw_queues_props) 550 return -ENOMEM; 551 552 for (i = 0 ; i < prop->max_queues ; i++) { 553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 555 prop->hw_queues_props[i].driver_only = 0; 556 prop->hw_queues_props[i].supports_sync_stream = 1; 557 prop->hw_queues_props[i].cb_alloc_flags = 558 CB_ALLOC_KERNEL; 559 num_sync_stream_queues++; 560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 562 prop->hw_queues_props[i].driver_only = 1; 563 prop->hw_queues_props[i].supports_sync_stream = 0; 564 prop->hw_queues_props[i].cb_alloc_flags = 565 CB_ALLOC_KERNEL; 566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 568 prop->hw_queues_props[i].driver_only = 0; 569 prop->hw_queues_props[i].supports_sync_stream = 0; 570 prop->hw_queues_props[i].cb_alloc_flags = 571 CB_ALLOC_USER; 572 573 } 574 prop->hw_queues_props[i].collective_mode = 575 get_collective_mode(hdev, i); 576 } 577 578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 579 prop->cfg_base_address = CFG_BASE; 580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 581 prop->host_base_address = HOST_PHYS_BASE; 582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 584 prop->completion_mode = HL_COMPLETION_MODE_JOB; 585 prop->collective_first_sob = 0; 586 prop->collective_first_mon = 0; 587 588 /* 2 SOBs per internal queue stream are reserved for collective */ 589 prop->sync_stream_first_sob = 590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 591 * QMAN_STREAMS * HL_RSVD_SOBS; 592 593 /* 1 monitor per internal queue stream are reserved for collective 594 * 2 monitors per external queue stream are reserved for collective 595 */ 596 prop->sync_stream_first_mon = 597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 598 (NUMBER_OF_EXT_HW_QUEUES * 2); 599 600 prop->dram_base_address = DRAM_PHYS_BASE; 601 prop->dram_size = GAUDI_HBM_SIZE_32GB; 602 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 604 605 prop->sram_base_address = SRAM_BASE_ADDR; 606 prop->sram_size = SRAM_SIZE; 607 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 608 prop->sram_user_base_address = 609 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 610 611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 613 614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 615 if (hdev->pldm) 616 prop->mmu_pgt_size = 0x800000; /* 8MB */ 617 else 618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 619 prop->mmu_pte_size = HL_PTE_SIZE; 620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 622 prop->dram_page_size = PAGE_SIZE_2MB; 623 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 624 prop->dram_supports_virtual_memory = false; 625 626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 636 prop->pmmu.start_addr = VA_HOST_SPACE_START; 637 prop->pmmu.end_addr = 638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 639 prop->pmmu.page_size = PAGE_SIZE_4KB; 640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 641 prop->pmmu.last_mask = LAST_MASK; 642 /* TODO: will be duplicated until implementing per-MMU props */ 643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 645 646 /* PMMU and HPMMU are the same except of page size */ 647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 649 650 /* shifts and masks are the same in PMMU and DMMU */ 651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 653 prop->dmmu.end_addr = VA_HOST_SPACE_END; 654 prop->dmmu.page_size = PAGE_SIZE_2MB; 655 656 prop->cfg_size = CFG_SIZE; 657 prop->max_asid = MAX_ASID; 658 prop->num_of_events = GAUDI_EVENT_SIZE; 659 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 660 661 set_default_power_values(hdev); 662 663 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 664 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 665 666 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 667 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 668 669 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 670 CARD_NAME_MAX_LEN); 671 672 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 673 674 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 675 prop->sync_stream_first_sob + 676 (num_sync_stream_queues * HL_RSVD_SOBS); 677 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 678 prop->sync_stream_first_mon + 679 (num_sync_stream_queues * HL_RSVD_MONS); 680 681 prop->first_available_user_interrupt = USHRT_MAX; 682 683 for (i = 0 ; i < HL_MAX_DCORES ; i++) 684 prop->first_available_cq[i] = USHRT_MAX; 685 686 prop->fw_cpu_boot_dev_sts0_valid = false; 687 prop->fw_cpu_boot_dev_sts1_valid = false; 688 prop->hard_reset_done_by_fw = false; 689 prop->gic_interrupts_enable = true; 690 691 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 692 693 prop->clk_pll_index = HL_GAUDI_MME_PLL; 694 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 695 696 prop->use_get_power_for_reset_history = true; 697 698 prop->configurable_stop_on_err = true; 699 700 prop->set_max_power_on_device_init = true; 701 702 prop->dma_mask = 48; 703 704 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 705 706 return 0; 707 } 708 709 static int gaudi_pci_bars_map(struct hl_device *hdev) 710 { 711 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 712 bool is_wc[3] = {false, false, true}; 713 int rc; 714 715 rc = hl_pci_bars_map(hdev, name, is_wc); 716 if (rc) 717 return rc; 718 719 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 720 (CFG_BASE - SPI_FLASH_BASE_ADDR); 721 722 return 0; 723 } 724 725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 726 { 727 struct gaudi_device *gaudi = hdev->asic_specific; 728 struct hl_inbound_pci_region pci_region; 729 u64 old_addr = addr; 730 int rc; 731 732 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 733 return old_addr; 734 735 if (hdev->asic_prop.iatu_done_by_fw) 736 return U64_MAX; 737 738 /* Inbound Region 2 - Bar 4 - Point to HBM */ 739 pci_region.mode = PCI_BAR_MATCH_MODE; 740 pci_region.bar = HBM_BAR_ID; 741 pci_region.addr = addr; 742 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 743 if (rc) 744 return U64_MAX; 745 746 if (gaudi) { 747 old_addr = gaudi->hbm_bar_cur_addr; 748 gaudi->hbm_bar_cur_addr = addr; 749 } 750 751 return old_addr; 752 } 753 754 static int gaudi_init_iatu(struct hl_device *hdev) 755 { 756 struct hl_inbound_pci_region inbound_region; 757 struct hl_outbound_pci_region outbound_region; 758 int rc; 759 760 if (hdev->asic_prop.iatu_done_by_fw) 761 return 0; 762 763 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 764 inbound_region.mode = PCI_BAR_MATCH_MODE; 765 inbound_region.bar = SRAM_BAR_ID; 766 inbound_region.addr = SRAM_BASE_ADDR; 767 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 768 if (rc) 769 goto done; 770 771 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 772 inbound_region.mode = PCI_BAR_MATCH_MODE; 773 inbound_region.bar = CFG_BAR_ID; 774 inbound_region.addr = SPI_FLASH_BASE_ADDR; 775 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 776 if (rc) 777 goto done; 778 779 /* Inbound Region 2 - Bar 4 - Point to HBM */ 780 inbound_region.mode = PCI_BAR_MATCH_MODE; 781 inbound_region.bar = HBM_BAR_ID; 782 inbound_region.addr = DRAM_PHYS_BASE; 783 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 784 if (rc) 785 goto done; 786 787 /* Outbound Region 0 - Point to Host */ 788 outbound_region.addr = HOST_PHYS_BASE; 789 outbound_region.size = HOST_PHYS_SIZE; 790 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 791 792 done: 793 return rc; 794 } 795 796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 797 { 798 return RREG32(mmHW_STATE); 799 } 800 801 static int gaudi_early_init(struct hl_device *hdev) 802 { 803 struct asic_fixed_properties *prop = &hdev->asic_prop; 804 struct pci_dev *pdev = hdev->pdev; 805 resource_size_t pci_bar_size; 806 u32 fw_boot_status; 807 int rc; 808 809 rc = gaudi_set_fixed_properties(hdev); 810 if (rc) { 811 dev_err(hdev->dev, "Failed setting fixed properties\n"); 812 return rc; 813 } 814 815 /* Check BAR sizes */ 816 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 817 818 if (pci_bar_size != SRAM_BAR_SIZE) { 819 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 820 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 821 rc = -ENODEV; 822 goto free_queue_props; 823 } 824 825 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 826 827 if (pci_bar_size != CFG_BAR_SIZE) { 828 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 829 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 830 rc = -ENODEV; 831 goto free_queue_props; 832 } 833 834 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 835 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 836 837 /* If FW security is enabled at this point it means no access to ELBI */ 838 if (hdev->asic_prop.fw_security_enabled) { 839 hdev->asic_prop.iatu_done_by_fw = true; 840 841 /* 842 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 843 * decision can only be taken based on PCI ID security. 844 */ 845 hdev->asic_prop.gic_interrupts_enable = false; 846 goto pci_init; 847 } 848 849 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 850 &fw_boot_status); 851 if (rc) 852 goto free_queue_props; 853 854 /* Check whether FW is configuring iATU */ 855 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 856 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 857 hdev->asic_prop.iatu_done_by_fw = true; 858 859 pci_init: 860 rc = hl_pci_init(hdev); 861 if (rc) 862 goto free_queue_props; 863 864 /* Before continuing in the initialization, we need to read the preboot 865 * version to determine whether we run with a security-enabled firmware 866 */ 867 rc = hl_fw_read_preboot_status(hdev); 868 if (rc) { 869 if (hdev->reset_on_preboot_fail) 870 hdev->asic_funcs->hw_fini(hdev, true, false); 871 goto pci_fini; 872 } 873 874 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 875 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 876 hdev->asic_funcs->hw_fini(hdev, true, false); 877 } 878 879 return 0; 880 881 pci_fini: 882 hl_pci_fini(hdev); 883 free_queue_props: 884 kfree(hdev->asic_prop.hw_queues_props); 885 return rc; 886 } 887 888 static int gaudi_early_fini(struct hl_device *hdev) 889 { 890 kfree(hdev->asic_prop.hw_queues_props); 891 hl_pci_fini(hdev); 892 893 return 0; 894 } 895 896 /** 897 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 898 * 899 * @hdev: pointer to hl_device structure 900 * 901 */ 902 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 903 { 904 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 905 struct asic_fixed_properties *prop = &hdev->asic_prop; 906 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 907 int rc; 908 909 if ((hdev->fw_components & FW_TYPE_LINUX) && 910 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 911 struct gaudi_device *gaudi = hdev->asic_specific; 912 913 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 914 return 0; 915 916 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 917 918 if (rc) 919 return rc; 920 921 freq = pll_freq_arr[2]; 922 } else { 923 /* Backward compatibility */ 924 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 925 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 926 nr = RREG32(mmPSOC_CPU_PLL_NR); 927 nf = RREG32(mmPSOC_CPU_PLL_NF); 928 od = RREG32(mmPSOC_CPU_PLL_OD); 929 930 if (div_sel == DIV_SEL_REF_CLK || 931 div_sel == DIV_SEL_DIVIDED_REF) { 932 if (div_sel == DIV_SEL_REF_CLK) 933 freq = PLL_REF_CLK; 934 else 935 freq = PLL_REF_CLK / (div_fctr + 1); 936 } else if (div_sel == DIV_SEL_PLL_CLK || 937 div_sel == DIV_SEL_DIVIDED_PLL) { 938 pll_clk = PLL_REF_CLK * (nf + 1) / 939 ((nr + 1) * (od + 1)); 940 if (div_sel == DIV_SEL_PLL_CLK) 941 freq = pll_clk; 942 else 943 freq = pll_clk / (div_fctr + 1); 944 } else { 945 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 946 freq = 0; 947 } 948 } 949 950 prop->psoc_timestamp_frequency = freq; 951 prop->psoc_pci_pll_nr = nr; 952 prop->psoc_pci_pll_nf = nf; 953 prop->psoc_pci_pll_od = od; 954 prop->psoc_pci_pll_div_factor = div_fctr; 955 956 return 0; 957 } 958 959 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 960 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 961 { 962 struct asic_fixed_properties *prop = &hdev->asic_prop; 963 struct packet_lin_dma *init_tpc_mem_pkt; 964 struct hl_cs_job *job; 965 struct hl_cb *cb; 966 u64 dst_addr; 967 u32 cb_size, ctl; 968 u8 tpc_id; 969 int rc; 970 971 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 972 if (!cb) 973 return -EFAULT; 974 975 init_tpc_mem_pkt = cb->kernel_address; 976 cb_size = sizeof(*init_tpc_mem_pkt); 977 memset(init_tpc_mem_pkt, 0, cb_size); 978 979 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 980 981 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 982 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 983 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 984 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 985 986 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 987 988 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 989 990 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 991 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 992 round_up(prop->sram_user_base_address, SZ_8K)); 993 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 994 995 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 996 if (!job) { 997 dev_err(hdev->dev, "Failed to allocate a new job\n"); 998 rc = -ENOMEM; 999 goto release_cb; 1000 } 1001 1002 job->id = 0; 1003 job->user_cb = cb; 1004 atomic_inc(&job->user_cb->cs_cnt); 1005 job->user_cb_size = cb_size; 1006 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1007 job->patched_cb = job->user_cb; 1008 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1009 1010 hl_debugfs_add_job(hdev, job); 1011 1012 rc = gaudi_send_job_on_qman0(hdev, job); 1013 1014 if (rc) 1015 goto free_job; 1016 1017 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1018 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1019 if (rc) 1020 break; 1021 } 1022 1023 free_job: 1024 hl_userptr_delete_list(hdev, &job->userptr_list); 1025 hl_debugfs_remove_job(hdev, job); 1026 kfree(job); 1027 atomic_dec(&cb->cs_cnt); 1028 1029 release_cb: 1030 hl_cb_put(cb); 1031 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1032 1033 return rc; 1034 } 1035 1036 /* 1037 * gaudi_init_tpc_mem() - Initialize TPC memories. 1038 * @hdev: Pointer to hl_device structure. 1039 * 1040 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1041 * 1042 * Return: 0 for success, negative value for error. 1043 */ 1044 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1045 { 1046 const struct firmware *fw; 1047 size_t fw_size; 1048 void *cpu_addr; 1049 dma_addr_t dma_handle; 1050 int rc, count = 5; 1051 1052 again: 1053 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1054 if (rc == -EINTR && count-- > 0) { 1055 msleep(50); 1056 goto again; 1057 } 1058 1059 if (rc) { 1060 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1061 GAUDI_TPC_FW_FILE); 1062 goto out; 1063 } 1064 1065 fw_size = fw->size; 1066 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1067 if (!cpu_addr) { 1068 dev_err(hdev->dev, 1069 "Failed to allocate %zu of dma memory for TPC kernel\n", 1070 fw_size); 1071 rc = -ENOMEM; 1072 goto out; 1073 } 1074 1075 memcpy(cpu_addr, fw->data, fw_size); 1076 1077 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1078 1079 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1080 1081 out: 1082 release_firmware(fw); 1083 return rc; 1084 } 1085 1086 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1087 { 1088 struct gaudi_device *gaudi = hdev->asic_specific; 1089 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1090 struct hl_hw_queue *q; 1091 u32 i, sob_id, sob_group_id, queue_id; 1092 1093 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1094 sob_group_id = 1095 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1096 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1097 1098 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1099 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1100 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1101 q->sync_stream_prop.collective_sob_id = sob_id + i; 1102 } 1103 1104 /* Both DMA5 and TPC7 use the same resources since only a single 1105 * engine need to participate in the reduction process 1106 */ 1107 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1108 q = &hdev->kernel_queues[queue_id]; 1109 q->sync_stream_prop.collective_sob_id = 1110 sob_id + NIC_NUMBER_OF_ENGINES; 1111 1112 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1113 q = &hdev->kernel_queues[queue_id]; 1114 q->sync_stream_prop.collective_sob_id = 1115 sob_id + NIC_NUMBER_OF_ENGINES; 1116 } 1117 1118 static void gaudi_sob_group_hw_reset(struct kref *ref) 1119 { 1120 struct gaudi_hw_sob_group *hw_sob_group = 1121 container_of(ref, struct gaudi_hw_sob_group, kref); 1122 struct hl_device *hdev = hw_sob_group->hdev; 1123 int i; 1124 1125 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1126 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1127 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1128 1129 kref_init(&hw_sob_group->kref); 1130 } 1131 1132 static void gaudi_sob_group_reset_error(struct kref *ref) 1133 { 1134 struct gaudi_hw_sob_group *hw_sob_group = 1135 container_of(ref, struct gaudi_hw_sob_group, kref); 1136 struct hl_device *hdev = hw_sob_group->hdev; 1137 1138 dev_crit(hdev->dev, 1139 "SOB release shouldn't be called here, base_sob_id: %d\n", 1140 hw_sob_group->base_sob_id); 1141 } 1142 1143 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1144 { 1145 struct gaudi_collective_properties *prop; 1146 int i; 1147 1148 prop = &gaudi->collective_props; 1149 1150 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1151 1152 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1153 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1154 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1155 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1156 /* Set collective engine bit */ 1157 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1158 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1159 } 1160 1161 static int gaudi_collective_init(struct hl_device *hdev) 1162 { 1163 u32 i, sob_id, reserved_sobs_per_group; 1164 struct gaudi_collective_properties *prop; 1165 struct gaudi_device *gaudi; 1166 1167 gaudi = hdev->asic_specific; 1168 prop = &gaudi->collective_props; 1169 sob_id = hdev->asic_prop.collective_first_sob; 1170 1171 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1172 reserved_sobs_per_group = 1173 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1174 1175 /* Init SOB groups */ 1176 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1177 prop->hw_sob_group[i].hdev = hdev; 1178 prop->hw_sob_group[i].base_sob_id = sob_id; 1179 sob_id += reserved_sobs_per_group; 1180 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1181 } 1182 1183 for (i = 0 ; i < QMAN_STREAMS; i++) { 1184 prop->next_sob_group_val[i] = 1; 1185 prop->curr_sob_group_idx[i] = 0; 1186 gaudi_collective_map_sobs(hdev, i); 1187 } 1188 1189 gaudi_collective_mstr_sob_mask_set(gaudi); 1190 1191 return 0; 1192 } 1193 1194 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1195 { 1196 struct gaudi_device *gaudi = hdev->asic_specific; 1197 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1198 1199 kref_put(&cprop->hw_sob_group[sob_group].kref, 1200 gaudi_sob_group_hw_reset); 1201 } 1202 1203 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1204 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1205 { 1206 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1207 struct gaudi_collective_properties *cprop; 1208 struct hl_gen_wait_properties wait_prop; 1209 struct hl_sync_stream_properties *prop; 1210 struct gaudi_device *gaudi; 1211 1212 gaudi = hdev->asic_specific; 1213 cprop = &gaudi->collective_props; 1214 queue_id = job->hw_queue_id; 1215 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1216 1217 master_sob_base = 1218 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1219 master_monitor = prop->collective_mstr_mon_id[0]; 1220 1221 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1222 1223 dev_dbg(hdev->dev, 1224 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1225 master_sob_base, cprop->mstr_sob_mask[0], 1226 cprop->next_sob_group_val[stream], 1227 master_monitor, queue_id); 1228 1229 wait_prop.data = (void *) job->patched_cb; 1230 wait_prop.sob_base = master_sob_base; 1231 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1232 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1233 wait_prop.mon_id = master_monitor; 1234 wait_prop.q_idx = queue_id; 1235 wait_prop.size = cb_size; 1236 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1237 1238 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1239 master_monitor = prop->collective_mstr_mon_id[1]; 1240 1241 dev_dbg(hdev->dev, 1242 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1243 master_sob_base, cprop->mstr_sob_mask[1], 1244 cprop->next_sob_group_val[stream], 1245 master_monitor, queue_id); 1246 1247 wait_prop.sob_base = master_sob_base; 1248 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1249 wait_prop.mon_id = master_monitor; 1250 wait_prop.size = cb_size; 1251 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1252 } 1253 1254 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1255 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1256 { 1257 struct hl_gen_wait_properties wait_prop; 1258 struct hl_sync_stream_properties *prop; 1259 u32 queue_id, cb_size = 0; 1260 1261 queue_id = job->hw_queue_id; 1262 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1263 1264 if (job->cs->encaps_signals) { 1265 /* use the encaps signal handle store earlier in the flow 1266 * and set the SOB information from the encaps 1267 * signals handle 1268 */ 1269 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1270 cs_cmpl); 1271 1272 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1273 job->cs->sequence, 1274 cs_cmpl->hw_sob->sob_id, 1275 cs_cmpl->sob_val); 1276 } 1277 1278 /* Add to wait CBs using slave monitor */ 1279 wait_prop.data = (void *) job->user_cb; 1280 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1281 wait_prop.sob_mask = 0x1; 1282 wait_prop.sob_val = cs_cmpl->sob_val; 1283 wait_prop.mon_id = prop->collective_slave_mon_id; 1284 wait_prop.q_idx = queue_id; 1285 wait_prop.size = cb_size; 1286 1287 dev_dbg(hdev->dev, 1288 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1289 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1290 prop->collective_slave_mon_id, queue_id); 1291 1292 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1293 1294 dev_dbg(hdev->dev, 1295 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1296 prop->collective_sob_id, queue_id); 1297 1298 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1299 prop->collective_sob_id, cb_size, false); 1300 } 1301 1302 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1303 { 1304 struct hl_cs_compl *signal_cs_cmpl = 1305 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1306 struct hl_cs_compl *cs_cmpl = 1307 container_of(cs->fence, struct hl_cs_compl, base_fence); 1308 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1309 struct gaudi_collective_properties *cprop; 1310 u32 stream, queue_id, sob_group_offset; 1311 struct gaudi_device *gaudi; 1312 struct hl_device *hdev; 1313 struct hl_cs_job *job; 1314 struct hl_ctx *ctx; 1315 1316 ctx = cs->ctx; 1317 hdev = ctx->hdev; 1318 gaudi = hdev->asic_specific; 1319 cprop = &gaudi->collective_props; 1320 1321 if (cs->encaps_signals) { 1322 cs_cmpl->hw_sob = handle->hw_sob; 1323 /* at this checkpoint we only need the hw_sob pointer 1324 * for the completion check before start going over the jobs 1325 * of the master/slaves, the sob_value will be taken later on 1326 * in gaudi_collective_slave_init_job depends on each 1327 * job wait offset value. 1328 */ 1329 cs_cmpl->sob_val = 0; 1330 } else { 1331 /* copy the SOB id and value of the signal CS */ 1332 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1333 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1334 } 1335 1336 /* check again if the signal cs already completed. 1337 * if yes then don't send any wait cs since the hw_sob 1338 * could be in reset already. if signal is not completed 1339 * then get refcount to hw_sob to prevent resetting the sob 1340 * while wait cs is not submitted. 1341 * note that this check is protected by two locks, 1342 * hw queue lock and completion object lock, 1343 * and the same completion object lock also protects 1344 * the hw_sob reset handler function. 1345 * The hw_queue lock prevent out of sync of hw_sob 1346 * refcount value, changed by signal/wait flows. 1347 */ 1348 spin_lock(&signal_cs_cmpl->lock); 1349 1350 if (completion_done(&cs->signal_fence->completion)) { 1351 spin_unlock(&signal_cs_cmpl->lock); 1352 return -EINVAL; 1353 } 1354 /* Increment kref since all slave queues are now waiting on it */ 1355 kref_get(&cs_cmpl->hw_sob->kref); 1356 1357 spin_unlock(&signal_cs_cmpl->lock); 1358 1359 /* Calculate the stream from collective master queue (1st job) */ 1360 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1361 stream = job->hw_queue_id % 4; 1362 sob_group_offset = 1363 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1364 1365 list_for_each_entry(job, &cs->job_list, cs_node) { 1366 queue_id = job->hw_queue_id; 1367 1368 if (hdev->kernel_queues[queue_id].collective_mode == 1369 HL_COLLECTIVE_MASTER) 1370 gaudi_collective_master_init_job(hdev, job, stream, 1371 sob_group_offset); 1372 else 1373 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1374 } 1375 1376 cs_cmpl->sob_group = sob_group_offset; 1377 1378 /* Handle sob group kref and wraparound */ 1379 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1380 cprop->next_sob_group_val[stream]++; 1381 1382 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1383 /* 1384 * Decrement as we reached the max value. 1385 * The release function won't be called here as we've 1386 * just incremented the refcount. 1387 */ 1388 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1389 gaudi_sob_group_reset_error); 1390 cprop->next_sob_group_val[stream] = 1; 1391 /* only two SOBs are currently in use */ 1392 cprop->curr_sob_group_idx[stream] = 1393 (cprop->curr_sob_group_idx[stream] + 1) & 1394 (HL_RSVD_SOBS - 1); 1395 1396 gaudi_collective_map_sobs(hdev, stream); 1397 1398 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1399 cprop->curr_sob_group_idx[stream], stream); 1400 } 1401 1402 mb(); 1403 hl_fence_put(cs->signal_fence); 1404 cs->signal_fence = NULL; 1405 1406 return 0; 1407 } 1408 1409 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1410 { 1411 u32 cacheline_end, additional_commands; 1412 1413 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1414 additional_commands = sizeof(struct packet_msg_prot) * 2; 1415 1416 if (user_cb_size + additional_commands > cacheline_end) 1417 return cacheline_end - user_cb_size + additional_commands; 1418 else 1419 return additional_commands; 1420 } 1421 1422 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1423 struct hl_ctx *ctx, struct hl_cs *cs, 1424 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1425 u32 encaps_signal_offset) 1426 { 1427 struct hw_queue_properties *hw_queue_prop; 1428 struct hl_cs_counters_atomic *cntr; 1429 struct hl_cs_job *job; 1430 struct hl_cb *cb; 1431 u32 cb_size; 1432 bool patched_cb; 1433 1434 cntr = &hdev->aggregated_cs_counters; 1435 1436 if (mode == HL_COLLECTIVE_MASTER) { 1437 /* CB size of collective master queue contains 1438 * 4 msg short packets for monitor 1 configuration 1439 * 1 fence packet 1440 * 4 msg short packets for monitor 2 configuration 1441 * 1 fence packet 1442 * 2 msg prot packets for completion and MSI 1443 */ 1444 cb_size = sizeof(struct packet_msg_short) * 8 + 1445 sizeof(struct packet_fence) * 2 + 1446 sizeof(struct packet_msg_prot) * 2; 1447 patched_cb = true; 1448 } else { 1449 /* CB size of collective slave queues contains 1450 * 4 msg short packets for monitor configuration 1451 * 1 fence packet 1452 * 1 additional msg short packet for sob signal 1453 */ 1454 cb_size = sizeof(struct packet_msg_short) * 5 + 1455 sizeof(struct packet_fence); 1456 patched_cb = false; 1457 } 1458 1459 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1460 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1461 if (!job) { 1462 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1463 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1464 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1465 return -ENOMEM; 1466 } 1467 1468 /* Allocate internal mapped CB for non patched CBs */ 1469 cb = hl_cb_kernel_create(hdev, cb_size, 1470 hdev->mmu_enable && !patched_cb); 1471 if (!cb) { 1472 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1473 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1474 kfree(job); 1475 return -EFAULT; 1476 } 1477 1478 job->id = 0; 1479 job->cs = cs; 1480 job->user_cb = cb; 1481 atomic_inc(&job->user_cb->cs_cnt); 1482 job->user_cb_size = cb_size; 1483 job->hw_queue_id = queue_id; 1484 1485 /* since its guaranteed to have only one chunk in the collective wait 1486 * cs, we can use this chunk to set the encapsulated signal offset 1487 * in the jobs. 1488 */ 1489 if (cs->encaps_signals) 1490 job->encaps_sig_wait_offset = encaps_signal_offset; 1491 1492 /* 1493 * No need in parsing, user CB is the patched CB. 1494 * We call hl_cb_destroy() out of two reasons - we don't need 1495 * the CB in the CB idr anymore and to decrement its refcount as 1496 * it was incremented inside hl_cb_kernel_create(). 1497 */ 1498 if (patched_cb) 1499 job->patched_cb = job->user_cb; 1500 else 1501 job->patched_cb = NULL; 1502 1503 job->job_cb_size = job->user_cb_size; 1504 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1505 1506 /* increment refcount as for external queues we get completion */ 1507 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1508 cs_get(cs); 1509 1510 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1511 1512 list_add_tail(&job->cs_node, &cs->job_list); 1513 1514 hl_debugfs_add_job(hdev, job); 1515 1516 return 0; 1517 } 1518 1519 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1520 struct hl_ctx *ctx, struct hl_cs *cs, 1521 u32 wait_queue_id, u32 collective_engine_id, 1522 u32 encaps_signal_offset) 1523 { 1524 struct gaudi_device *gaudi = hdev->asic_specific; 1525 struct hw_queue_properties *hw_queue_prop; 1526 u32 queue_id, collective_queue, num_jobs; 1527 u32 stream, nic_queue, nic_idx = 0; 1528 bool skip; 1529 int i, rc = 0; 1530 1531 /* Verify wait queue id is configured as master */ 1532 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1533 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1534 dev_err(hdev->dev, 1535 "Queue %d is not configured as collective master\n", 1536 wait_queue_id); 1537 return -EINVAL; 1538 } 1539 1540 /* Verify engine id is supported */ 1541 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1542 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1543 dev_err(hdev->dev, 1544 "Collective wait does not support engine %u\n", 1545 collective_engine_id); 1546 return -EINVAL; 1547 } 1548 1549 stream = wait_queue_id % 4; 1550 1551 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1552 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1553 else 1554 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1555 1556 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1557 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1558 1559 /* First job goes to the collective master queue, it will wait for 1560 * the collective slave queues to finish execution. 1561 * The synchronization is done using two monitors: 1562 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1563 * reduction engine (DMA5/TPC7). 1564 * 1565 * Rest of the jobs goes to the collective slave queues which will 1566 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1567 */ 1568 for (i = 0 ; i < num_jobs ; i++) { 1569 if (i == 0) { 1570 queue_id = wait_queue_id; 1571 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1572 HL_COLLECTIVE_MASTER, queue_id, 1573 wait_queue_id, encaps_signal_offset); 1574 } else { 1575 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1576 if (gaudi->hw_cap_initialized & 1577 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1578 skip = false; 1579 else 1580 skip = true; 1581 1582 queue_id = nic_queue; 1583 nic_queue += 4; 1584 nic_idx++; 1585 1586 if (skip) 1587 continue; 1588 } else { 1589 queue_id = collective_queue; 1590 } 1591 1592 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1593 HL_COLLECTIVE_SLAVE, queue_id, 1594 wait_queue_id, encaps_signal_offset); 1595 } 1596 1597 if (rc) 1598 return rc; 1599 } 1600 1601 return rc; 1602 } 1603 1604 static int gaudi_late_init(struct hl_device *hdev) 1605 { 1606 struct gaudi_device *gaudi = hdev->asic_specific; 1607 int rc; 1608 1609 rc = gaudi->cpucp_info_get(hdev); 1610 if (rc) { 1611 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1612 return rc; 1613 } 1614 1615 if ((hdev->card_type == cpucp_card_type_pci) && 1616 (hdev->nic_ports_mask & 0x3)) { 1617 dev_info(hdev->dev, 1618 "PCI card detected, only 8 ports are enabled\n"); 1619 hdev->nic_ports_mask &= ~0x3; 1620 1621 /* Stop and disable unused NIC QMANs */ 1622 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1623 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1624 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1625 1626 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1627 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1628 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1629 1630 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1631 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1632 1633 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1634 } 1635 1636 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1637 if (rc) { 1638 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1639 return rc; 1640 } 1641 1642 /* Scrub both SRAM and DRAM */ 1643 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1644 if (rc) 1645 goto disable_pci_access; 1646 1647 rc = gaudi_fetch_psoc_frequency(hdev); 1648 if (rc) { 1649 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1650 goto disable_pci_access; 1651 } 1652 1653 rc = gaudi_mmu_clear_pgt_range(hdev); 1654 if (rc) { 1655 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1656 goto disable_pci_access; 1657 } 1658 1659 rc = gaudi_init_tpc_mem(hdev); 1660 if (rc) { 1661 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1662 goto disable_pci_access; 1663 } 1664 1665 rc = gaudi_collective_init(hdev); 1666 if (rc) { 1667 dev_err(hdev->dev, "Failed to init collective\n"); 1668 goto disable_pci_access; 1669 } 1670 1671 /* We only support a single ASID for the user, so for the sake of optimization, just 1672 * initialize the ASID one time during device initialization with the fixed value of 1 1673 */ 1674 gaudi_mmu_prepare(hdev, 1); 1675 1676 hl_fw_set_pll_profile(hdev); 1677 1678 return 0; 1679 1680 disable_pci_access: 1681 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1682 1683 return rc; 1684 } 1685 1686 static void gaudi_late_fini(struct hl_device *hdev) 1687 { 1688 hl_hwmon_release_resources(hdev); 1689 } 1690 1691 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1692 { 1693 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1694 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1695 int i, j, rc = 0; 1696 1697 /* 1698 * The device CPU works with 40-bits addresses, while bit 39 must be set 1699 * to '1' when accessing the host. 1700 * Bits 49:39 of the full host address are saved for a later 1701 * configuration of the HW to perform extension to 50 bits. 1702 * Because there is a single HW register that holds the extension bits, 1703 * these bits must be identical in all allocated range. 1704 */ 1705 1706 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1707 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1708 &dma_addr_arr[i], 1709 GFP_KERNEL | __GFP_ZERO); 1710 if (!virt_addr_arr[i]) { 1711 rc = -ENOMEM; 1712 goto free_dma_mem_arr; 1713 } 1714 1715 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1716 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1717 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1718 break; 1719 } 1720 1721 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1722 dev_err(hdev->dev, 1723 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1724 rc = -EFAULT; 1725 goto free_dma_mem_arr; 1726 } 1727 1728 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1729 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1730 hdev->cpu_pci_msb_addr = 1731 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1732 1733 if (!hdev->asic_prop.fw_security_enabled) 1734 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1735 1736 free_dma_mem_arr: 1737 for (j = 0 ; j < i ; j++) 1738 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1739 dma_addr_arr[j]); 1740 1741 return rc; 1742 } 1743 1744 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1745 { 1746 struct gaudi_device *gaudi = hdev->asic_specific; 1747 struct gaudi_internal_qman_info *q; 1748 u32 i; 1749 1750 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1751 q = &gaudi->internal_qmans[i]; 1752 if (!q->pq_kernel_addr) 1753 continue; 1754 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1755 } 1756 } 1757 1758 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1759 { 1760 struct gaudi_device *gaudi = hdev->asic_specific; 1761 struct gaudi_internal_qman_info *q; 1762 int rc, i; 1763 1764 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1765 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1766 continue; 1767 1768 q = &gaudi->internal_qmans[i]; 1769 1770 switch (i) { 1771 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1772 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1773 break; 1774 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1775 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1776 break; 1777 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1778 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1779 break; 1780 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1781 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1782 break; 1783 default: 1784 dev_err(hdev->dev, "Bad internal queue index %d", i); 1785 rc = -EINVAL; 1786 goto free_internal_qmans_pq_mem; 1787 } 1788 1789 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1790 GFP_KERNEL | __GFP_ZERO); 1791 if (!q->pq_kernel_addr) { 1792 rc = -ENOMEM; 1793 goto free_internal_qmans_pq_mem; 1794 } 1795 } 1796 1797 return 0; 1798 1799 free_internal_qmans_pq_mem: 1800 gaudi_free_internal_qmans_pq_mem(hdev); 1801 return rc; 1802 } 1803 1804 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1805 { 1806 struct asic_fixed_properties *prop = &hdev->asic_prop; 1807 struct pci_mem_region *region; 1808 1809 /* CFG */ 1810 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1811 region->region_base = CFG_BASE; 1812 region->region_size = CFG_SIZE; 1813 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1814 region->bar_size = CFG_BAR_SIZE; 1815 region->bar_id = CFG_BAR_ID; 1816 region->used = 1; 1817 1818 /* SRAM */ 1819 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1820 region->region_base = SRAM_BASE_ADDR; 1821 region->region_size = SRAM_SIZE; 1822 region->offset_in_bar = 0; 1823 region->bar_size = SRAM_BAR_SIZE; 1824 region->bar_id = SRAM_BAR_ID; 1825 region->used = 1; 1826 1827 /* DRAM */ 1828 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1829 region->region_base = DRAM_PHYS_BASE; 1830 region->region_size = hdev->asic_prop.dram_size; 1831 region->offset_in_bar = 0; 1832 region->bar_size = prop->dram_pci_bar_size; 1833 region->bar_id = HBM_BAR_ID; 1834 region->used = 1; 1835 1836 /* SP SRAM */ 1837 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1838 region->region_base = PSOC_SCRATCHPAD_ADDR; 1839 region->region_size = PSOC_SCRATCHPAD_SIZE; 1840 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1841 region->bar_size = CFG_BAR_SIZE; 1842 region->bar_id = CFG_BAR_ID; 1843 region->used = 1; 1844 } 1845 1846 static int gaudi_sw_init(struct hl_device *hdev) 1847 { 1848 struct gaudi_device *gaudi; 1849 u32 i, event_id = 0; 1850 int rc; 1851 1852 /* Allocate device structure */ 1853 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1854 if (!gaudi) 1855 return -ENOMEM; 1856 1857 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1858 if (gaudi_irq_map_table[i].valid) { 1859 if (event_id == GAUDI_EVENT_SIZE) { 1860 dev_err(hdev->dev, 1861 "Event array exceeds the limit of %u events\n", 1862 GAUDI_EVENT_SIZE); 1863 rc = -EINVAL; 1864 goto free_gaudi_device; 1865 } 1866 1867 gaudi->events[event_id++] = 1868 gaudi_irq_map_table[i].fc_id; 1869 } 1870 } 1871 1872 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1873 1874 hdev->asic_specific = gaudi; 1875 1876 /* Create DMA pool for small allocations */ 1877 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1878 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1879 if (!hdev->dma_pool) { 1880 dev_err(hdev->dev, "failed to create DMA pool\n"); 1881 rc = -ENOMEM; 1882 goto free_gaudi_device; 1883 } 1884 1885 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1886 if (rc) 1887 goto free_dma_pool; 1888 1889 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1890 if (!hdev->cpu_accessible_dma_pool) { 1891 dev_err(hdev->dev, 1892 "Failed to create CPU accessible DMA pool\n"); 1893 rc = -ENOMEM; 1894 goto free_cpu_dma_mem; 1895 } 1896 1897 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1898 (uintptr_t) hdev->cpu_accessible_dma_mem, 1899 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1900 if (rc) { 1901 dev_err(hdev->dev, 1902 "Failed to add memory to CPU accessible DMA pool\n"); 1903 rc = -EFAULT; 1904 goto free_cpu_accessible_dma_pool; 1905 } 1906 1907 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1908 if (rc) 1909 goto free_cpu_accessible_dma_pool; 1910 1911 spin_lock_init(&gaudi->hw_queues_lock); 1912 1913 hdev->supports_sync_stream = true; 1914 hdev->supports_coresight = true; 1915 hdev->supports_staged_submission = true; 1916 hdev->supports_wait_for_multi_cs = true; 1917 1918 hdev->asic_funcs->set_pci_memory_regions(hdev); 1919 hdev->stream_master_qid_arr = 1920 hdev->asic_funcs->get_stream_master_qid_arr(); 1921 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1922 1923 return 0; 1924 1925 free_cpu_accessible_dma_pool: 1926 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1927 free_cpu_dma_mem: 1928 if (!hdev->asic_prop.fw_security_enabled) 1929 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1930 hdev->cpu_pci_msb_addr); 1931 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1932 hdev->cpu_accessible_dma_address); 1933 free_dma_pool: 1934 dma_pool_destroy(hdev->dma_pool); 1935 free_gaudi_device: 1936 kfree(gaudi); 1937 return rc; 1938 } 1939 1940 static int gaudi_sw_fini(struct hl_device *hdev) 1941 { 1942 struct gaudi_device *gaudi = hdev->asic_specific; 1943 1944 gaudi_free_internal_qmans_pq_mem(hdev); 1945 1946 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1947 1948 if (!hdev->asic_prop.fw_security_enabled) 1949 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1950 hdev->cpu_pci_msb_addr); 1951 1952 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1953 hdev->cpu_accessible_dma_address); 1954 1955 dma_pool_destroy(hdev->dma_pool); 1956 1957 kfree(gaudi); 1958 1959 return 0; 1960 } 1961 1962 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1963 { 1964 struct hl_device *hdev = arg; 1965 int i; 1966 1967 if (hdev->disabled) 1968 return IRQ_HANDLED; 1969 1970 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1971 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1972 1973 hl_irq_handler_eq(irq, &hdev->event_queue); 1974 1975 return IRQ_HANDLED; 1976 } 1977 1978 /* 1979 * For backward compatibility, new MSI interrupts should be set after the 1980 * existing CPU and NIC interrupts. 1981 */ 1982 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1983 bool cpu_eq) 1984 { 1985 int msi_vec; 1986 1987 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1988 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1989 GAUDI_EVENT_QUEUE_MSI_IDX); 1990 1991 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1992 (nr + NIC_NUMBER_OF_ENGINES + 1); 1993 1994 return pci_irq_vector(hdev->pdev, msi_vec); 1995 } 1996 1997 static int gaudi_enable_msi_single(struct hl_device *hdev) 1998 { 1999 int rc, irq; 2000 2001 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2002 2003 irq = gaudi_pci_irq_vector(hdev, 0, false); 2004 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2005 "gaudi single msi", hdev); 2006 if (rc) 2007 dev_err(hdev->dev, 2008 "Failed to request single MSI IRQ\n"); 2009 2010 return rc; 2011 } 2012 2013 static int gaudi_enable_msi_multi(struct hl_device *hdev) 2014 { 2015 int cq_cnt = hdev->asic_prop.completion_queues_count; 2016 int rc, i, irq_cnt_init, irq; 2017 2018 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { 2019 irq = gaudi_pci_irq_vector(hdev, i, false); 2020 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], 2021 &hdev->completion_queue[i]); 2022 if (rc) { 2023 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2024 goto free_irqs; 2025 } 2026 } 2027 2028 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); 2029 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], 2030 &hdev->event_queue); 2031 if (rc) { 2032 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2033 goto free_irqs; 2034 } 2035 2036 return 0; 2037 2038 free_irqs: 2039 for (i = 0 ; i < irq_cnt_init ; i++) 2040 free_irq(gaudi_pci_irq_vector(hdev, i, false), 2041 &hdev->completion_queue[i]); 2042 return rc; 2043 } 2044 2045 static int gaudi_enable_msi(struct hl_device *hdev) 2046 { 2047 struct gaudi_device *gaudi = hdev->asic_specific; 2048 int rc; 2049 2050 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2051 return 0; 2052 2053 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2054 if (rc < 0) { 2055 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2056 return rc; 2057 } 2058 2059 if (rc < NUMBER_OF_INTERRUPTS) { 2060 gaudi->multi_msi_mode = false; 2061 rc = gaudi_enable_msi_single(hdev); 2062 } else { 2063 gaudi->multi_msi_mode = true; 2064 rc = gaudi_enable_msi_multi(hdev); 2065 } 2066 2067 if (rc) 2068 goto free_pci_irq_vectors; 2069 2070 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2071 2072 return 0; 2073 2074 free_pci_irq_vectors: 2075 pci_free_irq_vectors(hdev->pdev); 2076 return rc; 2077 } 2078 2079 static void gaudi_sync_irqs(struct hl_device *hdev) 2080 { 2081 struct gaudi_device *gaudi = hdev->asic_specific; 2082 int i, cq_cnt = hdev->asic_prop.completion_queues_count; 2083 2084 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2085 return; 2086 2087 /* Wait for all pending IRQs to be finished */ 2088 if (gaudi->multi_msi_mode) { 2089 for (i = 0 ; i < cq_cnt ; i++) 2090 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); 2091 2092 synchronize_irq(gaudi_pci_irq_vector(hdev, 2093 GAUDI_EVENT_QUEUE_MSI_IDX, 2094 true)); 2095 } else { 2096 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2097 } 2098 } 2099 2100 static void gaudi_disable_msi(struct hl_device *hdev) 2101 { 2102 struct gaudi_device *gaudi = hdev->asic_specific; 2103 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; 2104 2105 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2106 return; 2107 2108 gaudi_sync_irqs(hdev); 2109 2110 if (gaudi->multi_msi_mode) { 2111 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, 2112 true); 2113 free_irq(irq, &hdev->event_queue); 2114 2115 for (i = 0 ; i < cq_cnt ; i++) { 2116 irq = gaudi_pci_irq_vector(hdev, i, false); 2117 free_irq(irq, &hdev->completion_queue[i]); 2118 } 2119 } else { 2120 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2121 } 2122 2123 pci_free_irq_vectors(hdev->pdev); 2124 2125 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2126 } 2127 2128 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2129 { 2130 struct gaudi_device *gaudi = hdev->asic_specific; 2131 2132 if (hdev->asic_prop.fw_security_enabled) 2133 return; 2134 2135 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2136 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2137 return; 2138 2139 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2140 return; 2141 2142 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2143 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2144 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2145 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2146 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2147 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2148 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2149 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2150 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2151 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2152 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2153 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2154 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2155 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2156 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2157 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2158 2159 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2161 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2163 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2165 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2167 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2169 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2171 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2173 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2174 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2175 2176 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2177 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2178 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2179 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2180 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2181 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2182 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2183 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2184 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2185 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2186 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2187 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2188 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2189 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2190 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2191 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2192 2193 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2194 } 2195 2196 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2197 { 2198 struct gaudi_device *gaudi = hdev->asic_specific; 2199 2200 if (hdev->asic_prop.fw_security_enabled) 2201 return; 2202 2203 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2204 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2205 return; 2206 2207 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2208 return; 2209 2210 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2211 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2212 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2213 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2214 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2215 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2216 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2218 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2220 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2222 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2224 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2225 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2226 2227 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2229 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2231 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2233 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2235 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2237 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2239 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2240 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2241 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2242 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2243 2244 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2245 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2246 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2247 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2248 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2249 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2250 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2251 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2252 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2253 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2254 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2255 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2256 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2257 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2258 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2259 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2260 2261 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2262 } 2263 2264 static void gaudi_init_e2e(struct hl_device *hdev) 2265 { 2266 if (hdev->asic_prop.fw_security_enabled) 2267 return; 2268 2269 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2270 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2271 return; 2272 2273 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2274 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2275 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2276 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2277 2278 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2279 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2280 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2281 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2282 2283 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2284 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2285 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2286 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2287 2288 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2289 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2290 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2291 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2292 2293 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2294 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2295 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2296 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2297 2298 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2299 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2300 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2301 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2302 2303 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2304 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2305 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2306 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2307 2308 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2309 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2310 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2311 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2312 2313 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2314 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2315 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2316 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2317 2318 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2319 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2320 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2321 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2322 2323 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2324 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2325 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2326 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2327 2328 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2329 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2330 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2331 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2332 2333 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2334 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2335 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2336 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2337 2338 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2339 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2340 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2341 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2342 2343 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2344 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2345 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2346 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2347 2348 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2349 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2350 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2351 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2352 2353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2354 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2355 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2356 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2357 2358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2359 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2360 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2361 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2362 2363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2364 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2365 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2366 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2367 2368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2369 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2370 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2371 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2372 2373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2374 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2375 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2376 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2377 2378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2379 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2380 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2381 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2382 2383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2384 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2385 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2386 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2387 2388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2389 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2390 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2391 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2392 2393 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2394 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2395 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2396 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2397 2398 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2399 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2400 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2401 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2402 2403 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2404 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2405 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2406 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2407 2408 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2409 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2410 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2411 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2412 2413 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2414 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2415 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2416 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2417 2418 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2419 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2420 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2421 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2422 2423 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2424 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2425 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2426 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2427 2428 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2429 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2430 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2431 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2432 2433 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2434 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2435 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2436 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2437 2438 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2439 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2440 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2441 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2442 2443 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2444 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2445 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2446 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2447 2448 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2449 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2450 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2451 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2452 2453 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2454 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2455 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2456 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2457 2458 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2459 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2460 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2461 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2462 2463 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2464 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2465 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2466 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2467 2468 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2469 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2470 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2471 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2472 2473 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2474 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2475 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2476 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2477 2478 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2479 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2480 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2481 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2482 2483 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2484 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2485 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2486 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2487 2488 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2489 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2490 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2491 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2492 2493 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2494 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2495 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2496 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2497 2498 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2499 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2500 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2501 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2502 2503 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2504 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2505 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2506 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2507 2508 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2509 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2510 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2511 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2512 } 2513 2514 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2515 { 2516 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2517 2518 if (hdev->asic_prop.fw_security_enabled) 2519 return; 2520 2521 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2522 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2523 return; 2524 2525 hbm0_wr = 0x33333333; 2526 hbm0_rd = 0x77777777; 2527 hbm1_wr = 0x55555555; 2528 hbm1_rd = 0xDDDDDDDD; 2529 2530 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2531 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2532 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2533 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2534 2535 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2536 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2537 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2538 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2539 2540 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2541 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2542 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2543 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2544 2545 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2546 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2547 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2548 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2549 2550 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2551 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2552 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2553 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2554 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2555 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2556 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2557 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2558 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2559 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2560 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2561 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2562 2563 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2564 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2565 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2566 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2567 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2568 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2569 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2570 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2571 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2572 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2573 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2574 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2575 } 2576 2577 static void gaudi_init_golden_registers(struct hl_device *hdev) 2578 { 2579 u32 tpc_offset; 2580 int tpc_id, i; 2581 2582 gaudi_init_e2e(hdev); 2583 gaudi_init_hbm_cred(hdev); 2584 2585 for (tpc_id = 0, tpc_offset = 0; 2586 tpc_id < TPC_NUMBER_OF_ENGINES; 2587 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2588 /* Mask all arithmetic interrupts from TPC */ 2589 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2590 /* Set 16 cache lines */ 2591 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2592 ICACHE_FETCH_LINE_NUM, 2); 2593 } 2594 2595 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2596 for (i = 0 ; i < 128 ; i += 8) 2597 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2598 2599 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2600 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2601 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2602 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2603 } 2604 2605 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2606 int qman_id, dma_addr_t qman_pq_addr) 2607 { 2608 struct cpu_dyn_regs *dyn_regs = 2609 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2610 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2611 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2612 u32 q_off, dma_qm_offset; 2613 u32 dma_qm_err_cfg, irq_handler_offset; 2614 2615 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2616 2617 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2618 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2619 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2620 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2621 so_base_en_lo = lower_32_bits(CFG_BASE + 2622 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2623 so_base_en_hi = upper_32_bits(CFG_BASE + 2624 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2625 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2626 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2627 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2628 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2629 so_base_ws_lo = lower_32_bits(CFG_BASE + 2630 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2631 so_base_ws_hi = upper_32_bits(CFG_BASE + 2632 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2633 2634 q_off = dma_qm_offset + qman_id * 4; 2635 2636 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2637 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2638 2639 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2640 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2641 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2642 2643 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2644 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2645 QMAN_LDMA_SRC_OFFSET); 2646 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2647 QMAN_LDMA_DST_OFFSET); 2648 2649 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2650 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2651 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2652 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2653 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2654 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2655 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2656 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2657 2658 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2659 2660 /* The following configuration is needed only once per QMAN */ 2661 if (qman_id == 0) { 2662 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2663 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2664 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2665 2666 /* Configure RAZWI IRQ */ 2667 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2668 if (hdev->stop_on_err) 2669 dma_qm_err_cfg |= 2670 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2671 2672 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2673 2674 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2675 lower_32_bits(CFG_BASE + irq_handler_offset)); 2676 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2677 upper_32_bits(CFG_BASE + irq_handler_offset)); 2678 2679 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2680 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2681 dma_id); 2682 2683 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2684 QM_ARB_ERR_MSG_EN_MASK); 2685 2686 /* Set timeout to maximum */ 2687 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2688 2689 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2690 QMAN_EXTERNAL_MAKE_TRUSTED); 2691 2692 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2693 } 2694 } 2695 2696 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2697 { 2698 struct cpu_dyn_regs *dyn_regs = 2699 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2700 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2701 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2702 u32 irq_handler_offset; 2703 2704 /* Set to maximum possible according to physical size */ 2705 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2706 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2707 2708 /* WA for H/W bug H3-2116 */ 2709 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2710 2711 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2712 if (hdev->stop_on_err) 2713 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2714 2715 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2716 2717 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2718 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2719 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2720 2721 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2722 lower_32_bits(CFG_BASE + irq_handler_offset)); 2723 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2724 upper_32_bits(CFG_BASE + irq_handler_offset)); 2725 2726 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2727 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2728 WREG32(mmDMA0_CORE_PROT + dma_offset, 2729 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2730 /* If the channel is secured, it should be in MMU bypass mode */ 2731 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2732 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2733 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2734 } 2735 2736 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2737 u32 enable_mask) 2738 { 2739 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2740 2741 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2742 } 2743 2744 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2745 { 2746 struct gaudi_device *gaudi = hdev->asic_specific; 2747 struct hl_hw_queue *q; 2748 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2749 2750 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2751 return; 2752 2753 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2754 dma_id = gaudi_dma_assignment[i]; 2755 /* 2756 * For queues after the CPU Q need to add 1 to get the correct 2757 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2758 * order to get the correct MSI register. 2759 */ 2760 if (dma_id > 1) { 2761 cpu_skip = 1; 2762 nic_skip = NIC_NUMBER_OF_ENGINES; 2763 } else { 2764 cpu_skip = 0; 2765 nic_skip = 0; 2766 } 2767 2768 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2769 q_idx = 4 * dma_id + j + cpu_skip; 2770 q = &hdev->kernel_queues[q_idx]; 2771 q->cq_id = cq_id++; 2772 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2773 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2774 q->bus_address); 2775 } 2776 2777 gaudi_init_dma_core(hdev, dma_id); 2778 2779 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2780 } 2781 2782 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2783 } 2784 2785 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2786 int qman_id, u64 qman_base_addr) 2787 { 2788 struct cpu_dyn_regs *dyn_regs = 2789 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2790 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2791 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2792 u32 dma_qm_err_cfg, irq_handler_offset; 2793 u32 q_off, dma_qm_offset; 2794 2795 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2796 2797 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2798 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2799 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2800 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2801 so_base_en_lo = lower_32_bits(CFG_BASE + 2802 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2803 so_base_en_hi = upper_32_bits(CFG_BASE + 2804 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2805 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2806 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2807 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2808 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2809 so_base_ws_lo = lower_32_bits(CFG_BASE + 2810 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2811 so_base_ws_hi = upper_32_bits(CFG_BASE + 2812 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2813 2814 q_off = dma_qm_offset + qman_id * 4; 2815 2816 if (qman_id < 4) { 2817 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2818 lower_32_bits(qman_base_addr)); 2819 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2820 upper_32_bits(qman_base_addr)); 2821 2822 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2823 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2824 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2825 2826 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2827 QMAN_CPDMA_SIZE_OFFSET); 2828 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2829 QMAN_CPDMA_SRC_OFFSET); 2830 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2831 QMAN_CPDMA_DST_OFFSET); 2832 } else { 2833 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2834 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2835 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2836 2837 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2838 QMAN_LDMA_SIZE_OFFSET); 2839 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2840 QMAN_LDMA_SRC_OFFSET); 2841 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2842 QMAN_LDMA_DST_OFFSET); 2843 2844 /* Configure RAZWI IRQ */ 2845 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2846 if (hdev->stop_on_err) 2847 dma_qm_err_cfg |= 2848 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2849 2850 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2851 2852 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2853 lower_32_bits(CFG_BASE + irq_handler_offset)); 2854 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2855 upper_32_bits(CFG_BASE + irq_handler_offset)); 2856 2857 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2858 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2859 dma_id); 2860 2861 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2862 QM_ARB_ERR_MSG_EN_MASK); 2863 2864 /* Set timeout to maximum */ 2865 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2866 2867 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2868 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2869 QMAN_INTERNAL_MAKE_TRUSTED); 2870 } 2871 2872 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2873 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2874 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2875 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2876 2877 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2878 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2879 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2880 mtr_base_ws_lo); 2881 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2882 mtr_base_ws_hi); 2883 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2884 so_base_ws_lo); 2885 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2886 so_base_ws_hi); 2887 } 2888 } 2889 2890 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2891 { 2892 struct gaudi_device *gaudi = hdev->asic_specific; 2893 struct gaudi_internal_qman_info *q; 2894 u64 qman_base_addr; 2895 int i, j, dma_id, internal_q_index; 2896 2897 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2898 return; 2899 2900 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2901 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2902 2903 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2904 /* 2905 * Add the CPU queue in order to get the correct queue 2906 * number as all internal queue are placed after it 2907 */ 2908 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2909 2910 q = &gaudi->internal_qmans[internal_q_index]; 2911 qman_base_addr = (u64) q->pq_dma_addr; 2912 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2913 qman_base_addr); 2914 } 2915 2916 /* Initializing lower CP for HBM DMA QMAN */ 2917 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2918 2919 gaudi_init_dma_core(hdev, dma_id); 2920 2921 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2922 } 2923 2924 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2925 } 2926 2927 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2928 int qman_id, u64 qman_base_addr) 2929 { 2930 struct cpu_dyn_regs *dyn_regs = 2931 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2932 u32 mtr_base_lo, mtr_base_hi; 2933 u32 so_base_lo, so_base_hi; 2934 u32 irq_handler_offset; 2935 u32 q_off, mme_id; 2936 u32 mme_qm_err_cfg; 2937 2938 mtr_base_lo = lower_32_bits(CFG_BASE + 2939 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2940 mtr_base_hi = upper_32_bits(CFG_BASE + 2941 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2942 so_base_lo = lower_32_bits(CFG_BASE + 2943 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2944 so_base_hi = upper_32_bits(CFG_BASE + 2945 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2946 2947 q_off = mme_offset + qman_id * 4; 2948 2949 if (qman_id < 4) { 2950 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2951 lower_32_bits(qman_base_addr)); 2952 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2953 upper_32_bits(qman_base_addr)); 2954 2955 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2956 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2957 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2958 2959 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2960 QMAN_CPDMA_SIZE_OFFSET); 2961 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2962 QMAN_CPDMA_SRC_OFFSET); 2963 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2964 QMAN_CPDMA_DST_OFFSET); 2965 } else { 2966 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2967 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2968 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2969 2970 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2971 QMAN_LDMA_SIZE_OFFSET); 2972 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2973 QMAN_LDMA_SRC_OFFSET); 2974 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2975 QMAN_LDMA_DST_OFFSET); 2976 2977 /* Configure RAZWI IRQ */ 2978 mme_id = mme_offset / 2979 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2980 2981 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2982 if (hdev->stop_on_err) 2983 mme_qm_err_cfg |= 2984 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2985 2986 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2987 2988 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2989 lower_32_bits(CFG_BASE + irq_handler_offset)); 2990 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2991 upper_32_bits(CFG_BASE + irq_handler_offset)); 2992 2993 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2994 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2995 mme_id); 2996 2997 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2998 QM_ARB_ERR_MSG_EN_MASK); 2999 3000 /* Set timeout to maximum */ 3001 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 3002 3003 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 3004 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 3005 QMAN_INTERNAL_MAKE_TRUSTED); 3006 } 3007 3008 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 3009 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 3010 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 3011 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 3012 } 3013 3014 static void gaudi_init_mme_qmans(struct hl_device *hdev) 3015 { 3016 struct gaudi_device *gaudi = hdev->asic_specific; 3017 struct gaudi_internal_qman_info *q; 3018 u64 qman_base_addr; 3019 u32 mme_offset; 3020 int i, internal_q_index; 3021 3022 if (gaudi->hw_cap_initialized & HW_CAP_MME) 3023 return; 3024 3025 /* 3026 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 3027 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 3028 */ 3029 3030 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3031 3032 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 3033 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 3034 q = &gaudi->internal_qmans[internal_q_index]; 3035 qman_base_addr = (u64) q->pq_dma_addr; 3036 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 3037 qman_base_addr); 3038 if (i == 3) 3039 mme_offset = 0; 3040 } 3041 3042 /* Initializing lower CP for MME QMANs */ 3043 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3044 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 3045 gaudi_init_mme_qman(hdev, 0, 4, 0); 3046 3047 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3048 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3049 3050 gaudi->hw_cap_initialized |= HW_CAP_MME; 3051 } 3052 3053 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 3054 int qman_id, u64 qman_base_addr) 3055 { 3056 struct cpu_dyn_regs *dyn_regs = 3057 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3058 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3059 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3060 u32 tpc_qm_err_cfg, irq_handler_offset; 3061 u32 q_off, tpc_id; 3062 3063 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3064 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3065 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3066 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3067 so_base_en_lo = lower_32_bits(CFG_BASE + 3068 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3069 so_base_en_hi = upper_32_bits(CFG_BASE + 3070 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3071 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3072 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3073 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3074 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3075 so_base_ws_lo = lower_32_bits(CFG_BASE + 3076 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3077 so_base_ws_hi = upper_32_bits(CFG_BASE + 3078 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3079 3080 q_off = tpc_offset + qman_id * 4; 3081 3082 tpc_id = tpc_offset / 3083 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3084 3085 if (qman_id < 4) { 3086 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3087 lower_32_bits(qman_base_addr)); 3088 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3089 upper_32_bits(qman_base_addr)); 3090 3091 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3092 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3093 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3094 3095 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3096 QMAN_CPDMA_SIZE_OFFSET); 3097 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3098 QMAN_CPDMA_SRC_OFFSET); 3099 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3100 QMAN_CPDMA_DST_OFFSET); 3101 } else { 3102 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3103 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3104 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3105 3106 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3107 QMAN_LDMA_SIZE_OFFSET); 3108 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3109 QMAN_LDMA_SRC_OFFSET); 3110 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3111 QMAN_LDMA_DST_OFFSET); 3112 3113 /* Configure RAZWI IRQ */ 3114 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3115 if (hdev->stop_on_err) 3116 tpc_qm_err_cfg |= 3117 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3118 3119 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3120 3121 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3122 lower_32_bits(CFG_BASE + irq_handler_offset)); 3123 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3124 upper_32_bits(CFG_BASE + irq_handler_offset)); 3125 3126 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3127 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3128 tpc_id); 3129 3130 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3131 QM_ARB_ERR_MSG_EN_MASK); 3132 3133 /* Set timeout to maximum */ 3134 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3135 3136 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3137 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3138 QMAN_INTERNAL_MAKE_TRUSTED); 3139 } 3140 3141 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3142 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3143 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3144 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3145 3146 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3147 if (tpc_id == 6) { 3148 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3149 mtr_base_ws_lo); 3150 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3151 mtr_base_ws_hi); 3152 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3153 so_base_ws_lo); 3154 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3155 so_base_ws_hi); 3156 } 3157 } 3158 3159 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3160 { 3161 struct gaudi_device *gaudi = hdev->asic_specific; 3162 struct gaudi_internal_qman_info *q; 3163 u64 qman_base_addr; 3164 u32 so_base_hi, tpc_offset = 0; 3165 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3166 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3167 int i, tpc_id, internal_q_index; 3168 3169 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3170 return; 3171 3172 so_base_hi = upper_32_bits(CFG_BASE + 3173 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3174 3175 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3176 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3177 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3178 tpc_id * QMAN_STREAMS + i; 3179 q = &gaudi->internal_qmans[internal_q_index]; 3180 qman_base_addr = (u64) q->pq_dma_addr; 3181 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3182 qman_base_addr); 3183 3184 if (i == 3) { 3185 /* Initializing lower CP for TPC QMAN */ 3186 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3187 3188 /* Enable the QMAN and TPC channel */ 3189 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3190 QMAN_TPC_ENABLE); 3191 } 3192 } 3193 3194 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3195 so_base_hi); 3196 3197 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3198 3199 gaudi->hw_cap_initialized |= 3200 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3201 } 3202 } 3203 3204 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3205 int qman_id, u64 qman_base_addr, int nic_id) 3206 { 3207 struct cpu_dyn_regs *dyn_regs = 3208 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3209 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3210 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3211 u32 nic_qm_err_cfg, irq_handler_offset; 3212 u32 q_off; 3213 3214 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3215 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3216 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3217 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3218 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3219 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3220 so_base_en_hi = upper_32_bits(CFG_BASE + 3221 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3222 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3223 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3224 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3225 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3226 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3227 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3228 so_base_ws_hi = upper_32_bits(CFG_BASE + 3229 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3230 3231 q_off = nic_offset + qman_id * 4; 3232 3233 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3234 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3235 3236 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3237 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3238 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3239 3240 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3241 QMAN_LDMA_SIZE_OFFSET); 3242 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3243 QMAN_LDMA_SRC_OFFSET); 3244 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3245 QMAN_LDMA_DST_OFFSET); 3246 3247 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3248 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3249 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3250 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3251 3252 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3253 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3254 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3255 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3256 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3257 3258 if (qman_id == 0) { 3259 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3260 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3261 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3262 3263 /* Configure RAZWI IRQ */ 3264 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3265 if (hdev->stop_on_err) 3266 nic_qm_err_cfg |= 3267 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3268 3269 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3270 3271 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3272 lower_32_bits(CFG_BASE + irq_handler_offset)); 3273 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3274 upper_32_bits(CFG_BASE + irq_handler_offset)); 3275 3276 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3277 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3278 nic_id); 3279 3280 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3281 QM_ARB_ERR_MSG_EN_MASK); 3282 3283 /* Set timeout to maximum */ 3284 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3285 3286 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3287 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3288 QMAN_INTERNAL_MAKE_TRUSTED); 3289 } 3290 } 3291 3292 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3293 { 3294 struct gaudi_device *gaudi = hdev->asic_specific; 3295 struct gaudi_internal_qman_info *q; 3296 u64 qman_base_addr; 3297 u32 nic_offset = 0; 3298 u32 nic_delta_between_qmans = 3299 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3300 u32 nic_delta_between_nics = 3301 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3302 int i, nic_id, internal_q_index; 3303 3304 if (!hdev->nic_ports_mask) 3305 return; 3306 3307 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3308 return; 3309 3310 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3311 3312 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3313 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3314 nic_offset += nic_delta_between_qmans; 3315 if (nic_id & 1) { 3316 nic_offset -= (nic_delta_between_qmans * 2); 3317 nic_offset += nic_delta_between_nics; 3318 } 3319 continue; 3320 } 3321 3322 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3323 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3324 nic_id * QMAN_STREAMS + i; 3325 q = &gaudi->internal_qmans[internal_q_index]; 3326 qman_base_addr = (u64) q->pq_dma_addr; 3327 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3328 qman_base_addr, nic_id); 3329 } 3330 3331 /* Enable the QMAN */ 3332 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3333 3334 nic_offset += nic_delta_between_qmans; 3335 if (nic_id & 1) { 3336 nic_offset -= (nic_delta_between_qmans * 2); 3337 nic_offset += nic_delta_between_nics; 3338 } 3339 3340 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3341 } 3342 } 3343 3344 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3345 { 3346 struct gaudi_device *gaudi = hdev->asic_specific; 3347 3348 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3349 return; 3350 3351 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3352 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3353 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3354 } 3355 3356 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3357 { 3358 struct gaudi_device *gaudi = hdev->asic_specific; 3359 3360 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3361 return; 3362 3363 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3364 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3365 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3366 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3367 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3368 } 3369 3370 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3371 { 3372 struct gaudi_device *gaudi = hdev->asic_specific; 3373 3374 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3375 return; 3376 3377 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3378 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3379 } 3380 3381 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3382 { 3383 struct gaudi_device *gaudi = hdev->asic_specific; 3384 u32 tpc_offset = 0; 3385 int tpc_id; 3386 3387 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3388 return; 3389 3390 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3391 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3392 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3393 } 3394 } 3395 3396 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3397 { 3398 struct gaudi_device *gaudi = hdev->asic_specific; 3399 u32 nic_mask, nic_offset = 0; 3400 u32 nic_delta_between_qmans = 3401 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3402 u32 nic_delta_between_nics = 3403 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3404 int nic_id; 3405 3406 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3407 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3408 3409 if (gaudi->hw_cap_initialized & nic_mask) 3410 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3411 3412 nic_offset += nic_delta_between_qmans; 3413 if (nic_id & 1) { 3414 nic_offset -= (nic_delta_between_qmans * 2); 3415 nic_offset += nic_delta_between_nics; 3416 } 3417 } 3418 } 3419 3420 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3421 { 3422 struct gaudi_device *gaudi = hdev->asic_specific; 3423 3424 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3425 return; 3426 3427 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3428 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3429 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3430 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3431 } 3432 3433 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3434 { 3435 struct gaudi_device *gaudi = hdev->asic_specific; 3436 3437 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3438 return; 3439 3440 /* Stop CPs of HBM DMA QMANs */ 3441 3442 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3443 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3444 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3445 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3446 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3447 } 3448 3449 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3450 { 3451 struct gaudi_device *gaudi = hdev->asic_specific; 3452 3453 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3454 return; 3455 3456 /* Stop CPs of MME QMANs */ 3457 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3458 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3459 } 3460 3461 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3462 { 3463 struct gaudi_device *gaudi = hdev->asic_specific; 3464 3465 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3466 return; 3467 3468 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3469 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3470 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3471 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3472 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3473 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3474 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3475 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3476 } 3477 3478 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3479 { 3480 struct gaudi_device *gaudi = hdev->asic_specific; 3481 3482 /* Stop upper CPs of QMANs */ 3483 3484 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3485 WREG32(mmNIC0_QM0_GLBL_CFG1, 3486 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3487 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3488 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3489 3490 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3491 WREG32(mmNIC0_QM1_GLBL_CFG1, 3492 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3493 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3494 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3495 3496 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3497 WREG32(mmNIC1_QM0_GLBL_CFG1, 3498 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3499 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3500 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3501 3502 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3503 WREG32(mmNIC1_QM1_GLBL_CFG1, 3504 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3505 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3506 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3507 3508 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3509 WREG32(mmNIC2_QM0_GLBL_CFG1, 3510 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3511 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3512 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3513 3514 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3515 WREG32(mmNIC2_QM1_GLBL_CFG1, 3516 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3517 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3518 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3519 3520 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3521 WREG32(mmNIC3_QM0_GLBL_CFG1, 3522 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3523 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3524 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3525 3526 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3527 WREG32(mmNIC3_QM1_GLBL_CFG1, 3528 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3529 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3530 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3531 3532 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3533 WREG32(mmNIC4_QM0_GLBL_CFG1, 3534 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3535 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3536 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3537 3538 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3539 WREG32(mmNIC4_QM1_GLBL_CFG1, 3540 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3541 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3542 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3543 } 3544 3545 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3546 { 3547 struct gaudi_device *gaudi = hdev->asic_specific; 3548 3549 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3550 return; 3551 3552 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3553 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3554 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3555 } 3556 3557 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3558 { 3559 struct gaudi_device *gaudi = hdev->asic_specific; 3560 3561 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3562 return; 3563 3564 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3565 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3566 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3567 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3568 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3569 } 3570 3571 static void gaudi_mme_stall(struct hl_device *hdev) 3572 { 3573 struct gaudi_device *gaudi = hdev->asic_specific; 3574 3575 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3576 return; 3577 3578 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3579 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3580 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3581 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3582 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3583 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3584 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3585 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3586 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3587 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3588 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3589 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3590 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3591 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3592 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3593 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3594 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3595 } 3596 3597 static void gaudi_tpc_stall(struct hl_device *hdev) 3598 { 3599 struct gaudi_device *gaudi = hdev->asic_specific; 3600 3601 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3602 return; 3603 3604 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3605 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3606 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3607 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3608 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3609 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3610 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3611 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3612 } 3613 3614 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3615 { 3616 u32 qman_offset; 3617 int i; 3618 3619 if (hdev->asic_prop.fw_security_enabled) 3620 return; 3621 3622 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3623 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3624 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3625 3626 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3627 } 3628 3629 WREG32(mmMME0_QM_CGM_CFG, 0); 3630 WREG32(mmMME0_QM_CGM_CFG1, 0); 3631 WREG32(mmMME2_QM_CGM_CFG, 0); 3632 WREG32(mmMME2_QM_CGM_CFG1, 0); 3633 3634 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3635 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3636 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3637 3638 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3639 } 3640 } 3641 3642 static void gaudi_enable_timestamp(struct hl_device *hdev) 3643 { 3644 /* Disable the timestamp counter */ 3645 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3646 3647 /* Zero the lower/upper parts of the 64-bit counter */ 3648 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3649 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3650 3651 /* Enable the counter */ 3652 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3653 } 3654 3655 static void gaudi_disable_timestamp(struct hl_device *hdev) 3656 { 3657 /* Disable the timestamp counter */ 3658 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3659 } 3660 3661 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3662 { 3663 u32 wait_timeout_ms; 3664 3665 if (hdev->pldm) 3666 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3667 else 3668 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3669 3670 if (fw_reset) 3671 goto skip_engines; 3672 3673 gaudi_stop_nic_qmans(hdev); 3674 gaudi_stop_mme_qmans(hdev); 3675 gaudi_stop_tpc_qmans(hdev); 3676 gaudi_stop_hbm_dma_qmans(hdev); 3677 gaudi_stop_pci_dma_qmans(hdev); 3678 3679 msleep(wait_timeout_ms); 3680 3681 gaudi_pci_dma_stall(hdev); 3682 gaudi_hbm_dma_stall(hdev); 3683 gaudi_tpc_stall(hdev); 3684 gaudi_mme_stall(hdev); 3685 3686 msleep(wait_timeout_ms); 3687 3688 gaudi_disable_nic_qmans(hdev); 3689 gaudi_disable_mme_qmans(hdev); 3690 gaudi_disable_tpc_qmans(hdev); 3691 gaudi_disable_hbm_dma_qmans(hdev); 3692 gaudi_disable_pci_dma_qmans(hdev); 3693 3694 gaudi_disable_timestamp(hdev); 3695 3696 skip_engines: 3697 gaudi_disable_msi(hdev); 3698 } 3699 3700 static int gaudi_mmu_init(struct hl_device *hdev) 3701 { 3702 struct asic_fixed_properties *prop = &hdev->asic_prop; 3703 struct gaudi_device *gaudi = hdev->asic_specific; 3704 u64 hop0_addr; 3705 int rc, i; 3706 3707 if (!hdev->mmu_enable) 3708 return 0; 3709 3710 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3711 return 0; 3712 3713 for (i = 0 ; i < prop->max_asid ; i++) { 3714 hop0_addr = prop->mmu_pgt_addr + 3715 (i * prop->mmu_hop_table_size); 3716 3717 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3718 if (rc) { 3719 dev_err(hdev->dev, 3720 "failed to set hop0 addr for asid %d\n", i); 3721 goto err; 3722 } 3723 } 3724 3725 /* init MMU cache manage page */ 3726 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3727 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3728 3729 /* mem cache invalidation */ 3730 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3731 3732 hl_mmu_invalidate_cache(hdev, true, 0); 3733 3734 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3735 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3736 3737 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3738 3739 /* 3740 * The H/W expects the first PI after init to be 1. After wraparound 3741 * we'll write 0. 3742 */ 3743 gaudi->mmu_cache_inv_pi = 1; 3744 3745 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3746 3747 return 0; 3748 3749 err: 3750 return rc; 3751 } 3752 3753 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3754 { 3755 void __iomem *dst; 3756 3757 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3758 3759 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3760 } 3761 3762 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3763 { 3764 void __iomem *dst; 3765 3766 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3767 3768 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3769 } 3770 3771 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3772 { 3773 struct dynamic_fw_load_mgr *dynamic_loader; 3774 struct cpu_dyn_regs *dyn_regs; 3775 3776 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3777 3778 /* 3779 * here we update initial values for few specific dynamic regs (as 3780 * before reading the first descriptor from FW those value has to be 3781 * hard-coded) in later stages of the protocol those values will be 3782 * updated automatically by reading the FW descriptor so data there 3783 * will always be up-to-date 3784 */ 3785 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3786 dyn_regs->kmd_msg_to_cpu = 3787 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3788 dyn_regs->cpu_cmd_status_to_host = 3789 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3790 3791 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3792 } 3793 3794 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3795 { 3796 struct static_fw_load_mgr *static_loader; 3797 3798 static_loader = &hdev->fw_loader.static_loader; 3799 3800 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3801 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3802 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3803 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3804 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3805 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3806 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3807 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3808 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3809 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3810 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3811 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3812 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3813 GAUDI_PLDM_RESET_WAIT_MSEC : 3814 GAUDI_CPU_RESET_WAIT_MSEC; 3815 } 3816 3817 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3818 { 3819 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3820 3821 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3822 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3823 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3824 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3825 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3826 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3827 } 3828 3829 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3830 { 3831 struct asic_fixed_properties *prop = &hdev->asic_prop; 3832 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3833 3834 /* fill common fields */ 3835 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3836 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3837 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3838 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3839 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3840 fw_loader->skip_bmc = !hdev->bmc_enable; 3841 fw_loader->sram_bar_id = SRAM_BAR_ID; 3842 fw_loader->dram_bar_id = HBM_BAR_ID; 3843 3844 if (prop->dynamic_fw_load) 3845 gaudi_init_dynamic_firmware_loader(hdev); 3846 else 3847 gaudi_init_static_firmware_loader(hdev); 3848 } 3849 3850 static int gaudi_init_cpu(struct hl_device *hdev) 3851 { 3852 struct gaudi_device *gaudi = hdev->asic_specific; 3853 int rc; 3854 3855 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3856 return 0; 3857 3858 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3859 return 0; 3860 3861 /* 3862 * The device CPU works with 40 bits addresses. 3863 * This register sets the extension to 50 bits. 3864 */ 3865 if (!hdev->asic_prop.fw_security_enabled) 3866 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3867 3868 rc = hl_fw_init_cpu(hdev); 3869 3870 if (rc) 3871 return rc; 3872 3873 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3874 3875 return 0; 3876 } 3877 3878 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3879 { 3880 struct cpu_dyn_regs *dyn_regs = 3881 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3882 struct asic_fixed_properties *prop = &hdev->asic_prop; 3883 struct gaudi_device *gaudi = hdev->asic_specific; 3884 u32 status, irq_handler_offset; 3885 struct hl_eq *eq; 3886 struct hl_hw_queue *cpu_pq = 3887 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3888 int err; 3889 3890 if (!hdev->cpu_queues_enable) 3891 return 0; 3892 3893 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3894 return 0; 3895 3896 eq = &hdev->event_queue; 3897 3898 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3899 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3900 3901 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3902 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3903 3904 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3905 lower_32_bits(hdev->cpu_accessible_dma_address)); 3906 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3907 upper_32_bits(hdev->cpu_accessible_dma_address)); 3908 3909 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3910 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3911 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3912 3913 /* Used for EQ CI */ 3914 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3915 3916 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3917 3918 if (gaudi->multi_msi_mode) 3919 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 3920 else 3921 WREG32(mmCPU_IF_QUEUE_INIT, 3922 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3923 3924 irq_handler_offset = prop->gic_interrupts_enable ? 3925 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3926 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3927 3928 WREG32(irq_handler_offset, 3929 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3930 3931 err = hl_poll_timeout( 3932 hdev, 3933 mmCPU_IF_QUEUE_INIT, 3934 status, 3935 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3936 1000, 3937 cpu_timeout); 3938 3939 if (err) { 3940 dev_err(hdev->dev, 3941 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3942 return -EIO; 3943 } 3944 3945 /* update FW application security bits */ 3946 if (prop->fw_cpu_boot_dev_sts0_valid) 3947 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3948 if (prop->fw_cpu_boot_dev_sts1_valid) 3949 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3950 3951 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3952 return 0; 3953 } 3954 3955 static void gaudi_pre_hw_init(struct hl_device *hdev) 3956 { 3957 /* Perform read from the device to make sure device is up */ 3958 RREG32(mmHW_STATE); 3959 3960 if (!hdev->asic_prop.fw_security_enabled) { 3961 /* Set the access through PCI bars (Linux driver only) as 3962 * secured 3963 */ 3964 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3965 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3966 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3967 3968 /* Perform read to flush the waiting writes to ensure 3969 * configuration was set in the device 3970 */ 3971 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3972 } 3973 3974 /* 3975 * Let's mark in the H/W that we have reached this point. We check 3976 * this value in the reset_before_init function to understand whether 3977 * we need to reset the chip before doing H/W init. This register is 3978 * cleared by the H/W upon H/W reset 3979 */ 3980 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3981 } 3982 3983 static int gaudi_hw_init(struct hl_device *hdev) 3984 { 3985 struct gaudi_device *gaudi = hdev->asic_specific; 3986 int rc; 3987 3988 gaudi_pre_hw_init(hdev); 3989 3990 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3991 * So we set it here and if anyone tries to move it later to 3992 * a different address, there will be an error 3993 */ 3994 if (hdev->asic_prop.iatu_done_by_fw) 3995 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3996 3997 /* 3998 * Before pushing u-boot/linux to device, need to set the hbm bar to 3999 * base address of dram 4000 */ 4001 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 4002 dev_err(hdev->dev, 4003 "failed to map HBM bar to DRAM base address\n"); 4004 return -EIO; 4005 } 4006 4007 rc = gaudi_init_cpu(hdev); 4008 if (rc) { 4009 dev_err(hdev->dev, "failed to initialize CPU\n"); 4010 return rc; 4011 } 4012 4013 /* In case the clock gating was enabled in preboot we need to disable 4014 * it here before touching the MME/TPC registers. 4015 */ 4016 gaudi_disable_clock_gating(hdev); 4017 4018 /* SRAM scrambler must be initialized after CPU is running from HBM */ 4019 gaudi_init_scrambler_sram(hdev); 4020 4021 /* This is here just in case we are working without CPU */ 4022 gaudi_init_scrambler_hbm(hdev); 4023 4024 gaudi_init_golden_registers(hdev); 4025 4026 rc = gaudi_mmu_init(hdev); 4027 if (rc) 4028 return rc; 4029 4030 gaudi_init_security(hdev); 4031 4032 gaudi_init_pci_dma_qmans(hdev); 4033 4034 gaudi_init_hbm_dma_qmans(hdev); 4035 4036 gaudi_init_mme_qmans(hdev); 4037 4038 gaudi_init_tpc_qmans(hdev); 4039 4040 gaudi_init_nic_qmans(hdev); 4041 4042 gaudi_enable_timestamp(hdev); 4043 4044 /* MSI must be enabled before CPU queues and NIC are initialized */ 4045 rc = gaudi_enable_msi(hdev); 4046 if (rc) 4047 goto disable_queues; 4048 4049 /* must be called after MSI was enabled */ 4050 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 4051 if (rc) { 4052 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 4053 rc); 4054 goto disable_msi; 4055 } 4056 4057 /* Perform read from the device to flush all configuration */ 4058 RREG32(mmHW_STATE); 4059 4060 return 0; 4061 4062 disable_msi: 4063 gaudi_disable_msi(hdev); 4064 disable_queues: 4065 gaudi_disable_mme_qmans(hdev); 4066 gaudi_disable_pci_dma_qmans(hdev); 4067 4068 return rc; 4069 } 4070 4071 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4072 { 4073 struct cpu_dyn_regs *dyn_regs = 4074 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4075 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4076 struct gaudi_device *gaudi = hdev->asic_specific; 4077 bool driver_performs_reset; 4078 4079 if (!hard_reset) { 4080 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4081 return; 4082 } 4083 4084 if (hdev->pldm) { 4085 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4086 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4087 } else { 4088 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4089 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4090 } 4091 4092 if (fw_reset) { 4093 dev_dbg(hdev->dev, 4094 "Firmware performs HARD reset, going to wait %dms\n", 4095 reset_timeout_ms); 4096 4097 goto skip_reset; 4098 } 4099 4100 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4101 !hdev->asic_prop.hard_reset_done_by_fw); 4102 4103 /* Set device to handle FLR by H/W as we will put the device CPU to 4104 * halt mode 4105 */ 4106 if (driver_performs_reset) 4107 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4108 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4109 4110 /* If linux is loaded in the device CPU we need to communicate with it 4111 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4112 * registers in case of old F/Ws 4113 */ 4114 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4115 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4116 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4117 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4118 4119 WREG32(irq_handler_offset, 4120 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4121 4122 /* This is a hail-mary attempt to revive the card in the small chance that the 4123 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4124 * In that case, triggering reset through GIC won't help. We need to trigger the 4125 * reset as if Linux wasn't loaded. 4126 * 4127 * We do it only if the reset cause was HB, because that would be the indication 4128 * of such an event. 4129 * 4130 * In case watchdog hasn't expired but we still got HB, then this won't do any 4131 * damage. 4132 */ 4133 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4134 if (hdev->asic_prop.hard_reset_done_by_fw) 4135 hl_fw_ask_hard_reset_without_linux(hdev); 4136 else 4137 hl_fw_ask_halt_machine_without_linux(hdev); 4138 } 4139 } else { 4140 if (hdev->asic_prop.hard_reset_done_by_fw) 4141 hl_fw_ask_hard_reset_without_linux(hdev); 4142 else 4143 hl_fw_ask_halt_machine_without_linux(hdev); 4144 } 4145 4146 if (driver_performs_reset) { 4147 4148 /* Configure the reset registers. Must be done as early as 4149 * possible in case we fail during H/W initialization 4150 */ 4151 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4152 (CFG_RST_H_DMA_MASK | 4153 CFG_RST_H_MME_MASK | 4154 CFG_RST_H_SM_MASK | 4155 CFG_RST_H_TPC_7_MASK)); 4156 4157 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4158 4159 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4160 (CFG_RST_H_HBM_MASK | 4161 CFG_RST_H_TPC_7_MASK | 4162 CFG_RST_H_NIC_MASK | 4163 CFG_RST_H_SM_MASK | 4164 CFG_RST_H_DMA_MASK | 4165 CFG_RST_H_MME_MASK | 4166 CFG_RST_H_CPU_MASK | 4167 CFG_RST_H_MMU_MASK)); 4168 4169 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4170 (CFG_RST_L_IF_MASK | 4171 CFG_RST_L_PSOC_MASK | 4172 CFG_RST_L_TPC_MASK)); 4173 4174 msleep(cpu_timeout_ms); 4175 4176 /* Tell ASIC not to re-initialize PCIe */ 4177 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4178 4179 /* Restart BTL/BLR upon hard-reset */ 4180 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4181 4182 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4183 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4184 4185 dev_dbg(hdev->dev, 4186 "Issued HARD reset command, going to wait %dms\n", 4187 reset_timeout_ms); 4188 } else { 4189 dev_dbg(hdev->dev, 4190 "Firmware performs HARD reset, going to wait %dms\n", 4191 reset_timeout_ms); 4192 } 4193 4194 skip_reset: 4195 /* 4196 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4197 * itself is in reset. Need to wait until the reset is deasserted 4198 */ 4199 msleep(reset_timeout_ms); 4200 4201 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4202 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) 4203 dev_err(hdev->dev, 4204 "Timeout while waiting for device to reset 0x%x\n", 4205 status); 4206 4207 if (gaudi) { 4208 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4209 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4210 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4211 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4212 HW_CAP_HBM_SCRAMBLER); 4213 4214 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4215 4216 hdev->device_cpu_is_halted = false; 4217 } 4218 } 4219 4220 static int gaudi_suspend(struct hl_device *hdev) 4221 { 4222 int rc; 4223 4224 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4225 if (rc) 4226 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4227 4228 return rc; 4229 } 4230 4231 static int gaudi_resume(struct hl_device *hdev) 4232 { 4233 return gaudi_init_iatu(hdev); 4234 } 4235 4236 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4237 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4238 { 4239 int rc; 4240 4241 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4242 VM_DONTCOPY | VM_NORESERVE; 4243 4244 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4245 (dma_addr - HOST_PHYS_BASE), size); 4246 if (rc) 4247 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4248 4249 return rc; 4250 } 4251 4252 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4253 { 4254 struct cpu_dyn_regs *dyn_regs = 4255 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4256 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4257 struct gaudi_device *gaudi = hdev->asic_specific; 4258 bool invalid_queue = false; 4259 int dma_id; 4260 4261 switch (hw_queue_id) { 4262 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4263 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4264 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4265 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4266 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4267 break; 4268 4269 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4270 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4271 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4272 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4273 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4274 break; 4275 4276 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4277 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4278 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4279 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4280 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4281 break; 4282 4283 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4284 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4285 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4286 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4287 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4288 break; 4289 4290 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4291 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4292 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4293 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4294 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4295 break; 4296 4297 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4298 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4299 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4300 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4301 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4302 break; 4303 4304 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4305 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4306 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4307 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4308 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4309 break; 4310 4311 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4312 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4313 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4314 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4315 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4316 break; 4317 4318 case GAUDI_QUEUE_ID_CPU_PQ: 4319 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4320 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4321 else 4322 invalid_queue = true; 4323 break; 4324 4325 case GAUDI_QUEUE_ID_MME_0_0: 4326 db_reg_offset = mmMME2_QM_PQ_PI_0; 4327 break; 4328 4329 case GAUDI_QUEUE_ID_MME_0_1: 4330 db_reg_offset = mmMME2_QM_PQ_PI_1; 4331 break; 4332 4333 case GAUDI_QUEUE_ID_MME_0_2: 4334 db_reg_offset = mmMME2_QM_PQ_PI_2; 4335 break; 4336 4337 case GAUDI_QUEUE_ID_MME_0_3: 4338 db_reg_offset = mmMME2_QM_PQ_PI_3; 4339 break; 4340 4341 case GAUDI_QUEUE_ID_MME_1_0: 4342 db_reg_offset = mmMME0_QM_PQ_PI_0; 4343 break; 4344 4345 case GAUDI_QUEUE_ID_MME_1_1: 4346 db_reg_offset = mmMME0_QM_PQ_PI_1; 4347 break; 4348 4349 case GAUDI_QUEUE_ID_MME_1_2: 4350 db_reg_offset = mmMME0_QM_PQ_PI_2; 4351 break; 4352 4353 case GAUDI_QUEUE_ID_MME_1_3: 4354 db_reg_offset = mmMME0_QM_PQ_PI_3; 4355 break; 4356 4357 case GAUDI_QUEUE_ID_TPC_0_0: 4358 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4359 break; 4360 4361 case GAUDI_QUEUE_ID_TPC_0_1: 4362 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4363 break; 4364 4365 case GAUDI_QUEUE_ID_TPC_0_2: 4366 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4367 break; 4368 4369 case GAUDI_QUEUE_ID_TPC_0_3: 4370 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4371 break; 4372 4373 case GAUDI_QUEUE_ID_TPC_1_0: 4374 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4375 break; 4376 4377 case GAUDI_QUEUE_ID_TPC_1_1: 4378 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4379 break; 4380 4381 case GAUDI_QUEUE_ID_TPC_1_2: 4382 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4383 break; 4384 4385 case GAUDI_QUEUE_ID_TPC_1_3: 4386 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4387 break; 4388 4389 case GAUDI_QUEUE_ID_TPC_2_0: 4390 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4391 break; 4392 4393 case GAUDI_QUEUE_ID_TPC_2_1: 4394 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4395 break; 4396 4397 case GAUDI_QUEUE_ID_TPC_2_2: 4398 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4399 break; 4400 4401 case GAUDI_QUEUE_ID_TPC_2_3: 4402 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4403 break; 4404 4405 case GAUDI_QUEUE_ID_TPC_3_0: 4406 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4407 break; 4408 4409 case GAUDI_QUEUE_ID_TPC_3_1: 4410 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4411 break; 4412 4413 case GAUDI_QUEUE_ID_TPC_3_2: 4414 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4415 break; 4416 4417 case GAUDI_QUEUE_ID_TPC_3_3: 4418 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4419 break; 4420 4421 case GAUDI_QUEUE_ID_TPC_4_0: 4422 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4423 break; 4424 4425 case GAUDI_QUEUE_ID_TPC_4_1: 4426 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4427 break; 4428 4429 case GAUDI_QUEUE_ID_TPC_4_2: 4430 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4431 break; 4432 4433 case GAUDI_QUEUE_ID_TPC_4_3: 4434 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4435 break; 4436 4437 case GAUDI_QUEUE_ID_TPC_5_0: 4438 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4439 break; 4440 4441 case GAUDI_QUEUE_ID_TPC_5_1: 4442 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4443 break; 4444 4445 case GAUDI_QUEUE_ID_TPC_5_2: 4446 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4447 break; 4448 4449 case GAUDI_QUEUE_ID_TPC_5_3: 4450 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4451 break; 4452 4453 case GAUDI_QUEUE_ID_TPC_6_0: 4454 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4455 break; 4456 4457 case GAUDI_QUEUE_ID_TPC_6_1: 4458 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4459 break; 4460 4461 case GAUDI_QUEUE_ID_TPC_6_2: 4462 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4463 break; 4464 4465 case GAUDI_QUEUE_ID_TPC_6_3: 4466 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4467 break; 4468 4469 case GAUDI_QUEUE_ID_TPC_7_0: 4470 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4471 break; 4472 4473 case GAUDI_QUEUE_ID_TPC_7_1: 4474 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4475 break; 4476 4477 case GAUDI_QUEUE_ID_TPC_7_2: 4478 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4479 break; 4480 4481 case GAUDI_QUEUE_ID_TPC_7_3: 4482 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4483 break; 4484 4485 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4486 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4487 invalid_queue = true; 4488 4489 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4490 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4491 break; 4492 4493 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4494 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4495 invalid_queue = true; 4496 4497 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4498 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4499 break; 4500 4501 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4502 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4503 invalid_queue = true; 4504 4505 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4506 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4507 break; 4508 4509 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4510 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4511 invalid_queue = true; 4512 4513 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4514 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4515 break; 4516 4517 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4518 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4519 invalid_queue = true; 4520 4521 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4522 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4523 break; 4524 4525 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4526 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4527 invalid_queue = true; 4528 4529 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4530 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4531 break; 4532 4533 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4534 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4535 invalid_queue = true; 4536 4537 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4538 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4539 break; 4540 4541 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4542 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4543 invalid_queue = true; 4544 4545 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4546 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4547 break; 4548 4549 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4550 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4551 invalid_queue = true; 4552 4553 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4554 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4555 break; 4556 4557 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4558 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4559 invalid_queue = true; 4560 4561 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4562 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4563 break; 4564 4565 default: 4566 invalid_queue = true; 4567 } 4568 4569 if (invalid_queue) { 4570 /* Should never get here */ 4571 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4572 hw_queue_id); 4573 return; 4574 } 4575 4576 db_value = pi; 4577 4578 /* ring the doorbell */ 4579 WREG32(db_reg_offset, db_value); 4580 4581 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4582 /* make sure device CPU will read latest data from host */ 4583 mb(); 4584 4585 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4586 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4587 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4588 4589 WREG32(irq_handler_offset, 4590 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4591 } 4592 } 4593 4594 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4595 struct hl_bd *bd) 4596 { 4597 __le64 *pbd = (__le64 *) bd; 4598 4599 /* The QMANs are on the host memory so a simple copy suffice */ 4600 pqe[0] = pbd[0]; 4601 pqe[1] = pbd[1]; 4602 } 4603 4604 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4605 dma_addr_t *dma_handle, gfp_t flags) 4606 { 4607 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4608 dma_handle, flags); 4609 4610 /* Shift to the device's base physical address of host memory */ 4611 if (kernel_addr) 4612 *dma_handle += HOST_PHYS_BASE; 4613 4614 return kernel_addr; 4615 } 4616 4617 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4618 void *cpu_addr, dma_addr_t dma_handle) 4619 { 4620 /* Cancel the device's base physical address of host memory */ 4621 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4622 4623 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4624 } 4625 4626 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4627 { 4628 struct asic_fixed_properties *prop = &hdev->asic_prop; 4629 u64 cur_addr = prop->dram_user_base_address; 4630 u32 chunk_size, busy; 4631 int rc, dma_id; 4632 4633 while (cur_addr < prop->dram_end_address) { 4634 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4635 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4636 4637 chunk_size = 4638 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4639 4640 dev_dbg(hdev->dev, 4641 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4642 cur_addr, cur_addr + chunk_size); 4643 4644 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4645 lower_32_bits(val)); 4646 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4647 upper_32_bits(val)); 4648 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4649 lower_32_bits(cur_addr)); 4650 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4651 upper_32_bits(cur_addr)); 4652 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4653 chunk_size); 4654 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4655 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4656 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4657 4658 cur_addr += chunk_size; 4659 4660 if (cur_addr == prop->dram_end_address) 4661 break; 4662 } 4663 4664 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4665 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4666 4667 rc = hl_poll_timeout( 4668 hdev, 4669 mmDMA0_CORE_STS0 + dma_offset, 4670 busy, 4671 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4672 1000, 4673 HBM_SCRUBBING_TIMEOUT_US); 4674 4675 if (rc) { 4676 dev_err(hdev->dev, 4677 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4678 dma_id); 4679 return -EIO; 4680 } 4681 } 4682 } 4683 4684 return 0; 4685 } 4686 4687 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4688 { 4689 struct asic_fixed_properties *prop = &hdev->asic_prop; 4690 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : 4691 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); 4692 u64 addr, size, val = hdev->memory_scrub_val; 4693 ktime_t timeout; 4694 int rc = 0; 4695 4696 if (!hdev->memory_scrub) 4697 return 0; 4698 4699 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4700 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4701 if (ktime_compare(ktime_get(), timeout) > 0) { 4702 dev_err(hdev->dev, "waiting for idle timeout\n"); 4703 return -ETIMEDOUT; 4704 } 4705 usleep_range((1000 >> 2) + 1, 1000); 4706 } 4707 4708 /* Scrub SRAM */ 4709 addr = prop->sram_user_base_address; 4710 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4711 4712 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4713 addr, addr + size, val); 4714 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4715 if (rc) { 4716 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4717 return rc; 4718 } 4719 4720 /* Scrub HBM using all DMA channels in parallel */ 4721 rc = gaudi_scrub_device_dram(hdev, val); 4722 if (rc) { 4723 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4724 return rc; 4725 } 4726 4727 return 0; 4728 } 4729 4730 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4731 u32 queue_id, dma_addr_t *dma_handle, 4732 u16 *queue_len) 4733 { 4734 struct gaudi_device *gaudi = hdev->asic_specific; 4735 struct gaudi_internal_qman_info *q; 4736 4737 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4738 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4739 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4740 return NULL; 4741 } 4742 4743 q = &gaudi->internal_qmans[queue_id]; 4744 *dma_handle = q->pq_dma_addr; 4745 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4746 4747 return q->pq_kernel_addr; 4748 } 4749 4750 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4751 u16 len, u32 timeout, u64 *result) 4752 { 4753 struct gaudi_device *gaudi = hdev->asic_specific; 4754 4755 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4756 if (result) 4757 *result = 0; 4758 return 0; 4759 } 4760 4761 if (!timeout) 4762 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4763 4764 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4765 timeout, result); 4766 } 4767 4768 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4769 { 4770 struct packet_msg_prot *fence_pkt; 4771 dma_addr_t pkt_dma_addr; 4772 u32 fence_val, tmp, timeout_usec; 4773 dma_addr_t fence_dma_addr; 4774 u32 *fence_ptr; 4775 int rc; 4776 4777 if (hdev->pldm) 4778 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4779 else 4780 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4781 4782 fence_val = GAUDI_QMAN0_FENCE_VAL; 4783 4784 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4785 if (!fence_ptr) { 4786 dev_err(hdev->dev, 4787 "Failed to allocate memory for H/W queue %d testing\n", 4788 hw_queue_id); 4789 return -ENOMEM; 4790 } 4791 4792 *fence_ptr = 0; 4793 4794 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4795 &pkt_dma_addr); 4796 if (!fence_pkt) { 4797 dev_err(hdev->dev, 4798 "Failed to allocate packet for H/W queue %d testing\n", 4799 hw_queue_id); 4800 rc = -ENOMEM; 4801 goto free_fence_ptr; 4802 } 4803 4804 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4805 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4806 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4807 4808 fence_pkt->ctl = cpu_to_le32(tmp); 4809 fence_pkt->value = cpu_to_le32(fence_val); 4810 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4811 4812 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4813 sizeof(struct packet_msg_prot), 4814 pkt_dma_addr); 4815 if (rc) { 4816 dev_err(hdev->dev, 4817 "Failed to send fence packet to H/W queue %d\n", 4818 hw_queue_id); 4819 goto free_pkt; 4820 } 4821 4822 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4823 1000, timeout_usec, true); 4824 4825 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4826 4827 if (rc == -ETIMEDOUT) { 4828 dev_err(hdev->dev, 4829 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4830 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4831 rc = -EIO; 4832 } 4833 4834 free_pkt: 4835 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4836 free_fence_ptr: 4837 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4838 return rc; 4839 } 4840 4841 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4842 { 4843 struct gaudi_device *gaudi = hdev->asic_specific; 4844 4845 /* 4846 * check capability here as send_cpu_message() won't update the result 4847 * value if no capability 4848 */ 4849 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4850 return 0; 4851 4852 return hl_fw_test_cpu_queue(hdev); 4853 } 4854 4855 static int gaudi_test_queues(struct hl_device *hdev) 4856 { 4857 int i, rc, ret_val = 0; 4858 4859 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4860 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4861 rc = gaudi_test_queue(hdev, i); 4862 if (rc) 4863 ret_val = -EINVAL; 4864 } 4865 } 4866 4867 rc = gaudi_test_cpu_queue(hdev); 4868 if (rc) 4869 ret_val = -EINVAL; 4870 4871 return ret_val; 4872 } 4873 4874 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4875 gfp_t mem_flags, dma_addr_t *dma_handle) 4876 { 4877 void *kernel_addr; 4878 4879 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4880 return NULL; 4881 4882 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4883 4884 /* Shift to the device's base physical address of host memory */ 4885 if (kernel_addr) 4886 *dma_handle += HOST_PHYS_BASE; 4887 4888 return kernel_addr; 4889 } 4890 4891 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4892 dma_addr_t dma_addr) 4893 { 4894 /* Cancel the device's base physical address of host memory */ 4895 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4896 4897 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4898 } 4899 4900 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4901 size_t size, dma_addr_t *dma_handle) 4902 { 4903 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4904 } 4905 4906 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4907 size_t size, void *vaddr) 4908 { 4909 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4910 } 4911 4912 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4913 { 4914 struct scatterlist *sg, *sg_next_iter; 4915 u32 count, dma_desc_cnt; 4916 u64 len, len_next; 4917 dma_addr_t addr, addr_next; 4918 4919 dma_desc_cnt = 0; 4920 4921 for_each_sgtable_dma_sg(sgt, sg, count) { 4922 len = sg_dma_len(sg); 4923 addr = sg_dma_address(sg); 4924 4925 if (len == 0) 4926 break; 4927 4928 while ((count + 1) < sgt->nents) { 4929 sg_next_iter = sg_next(sg); 4930 len_next = sg_dma_len(sg_next_iter); 4931 addr_next = sg_dma_address(sg_next_iter); 4932 4933 if (len_next == 0) 4934 break; 4935 4936 if ((addr + len == addr_next) && 4937 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4938 len += len_next; 4939 count++; 4940 sg = sg_next_iter; 4941 } else { 4942 break; 4943 } 4944 } 4945 4946 dma_desc_cnt++; 4947 } 4948 4949 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4950 } 4951 4952 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4953 struct hl_cs_parser *parser, 4954 struct packet_lin_dma *user_dma_pkt, 4955 u64 addr, enum dma_data_direction dir) 4956 { 4957 struct hl_userptr *userptr; 4958 int rc; 4959 4960 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4961 parser->job_userptr_list, &userptr)) 4962 goto already_pinned; 4963 4964 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4965 if (!userptr) 4966 return -ENOMEM; 4967 4968 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4969 userptr); 4970 if (rc) 4971 goto free_userptr; 4972 4973 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4974 4975 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 4976 if (rc) { 4977 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4978 goto unpin_memory; 4979 } 4980 4981 userptr->dma_mapped = true; 4982 userptr->dir = dir; 4983 4984 already_pinned: 4985 parser->patched_cb_size += 4986 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4987 4988 return 0; 4989 4990 unpin_memory: 4991 list_del(&userptr->job_node); 4992 hl_unpin_host_memory(hdev, userptr); 4993 free_userptr: 4994 kfree(userptr); 4995 return rc; 4996 } 4997 4998 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4999 struct hl_cs_parser *parser, 5000 struct packet_lin_dma *user_dma_pkt, 5001 bool src_in_host) 5002 { 5003 enum dma_data_direction dir; 5004 bool skip_host_mem_pin = false, user_memset; 5005 u64 addr; 5006 int rc = 0; 5007 5008 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 5009 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5010 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5011 5012 if (src_in_host) { 5013 if (user_memset) 5014 skip_host_mem_pin = true; 5015 5016 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 5017 dir = DMA_TO_DEVICE; 5018 addr = le64_to_cpu(user_dma_pkt->src_addr); 5019 } else { 5020 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 5021 dir = DMA_FROM_DEVICE; 5022 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5023 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5024 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5025 } 5026 5027 if (skip_host_mem_pin) 5028 parser->patched_cb_size += sizeof(*user_dma_pkt); 5029 else 5030 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 5031 addr, dir); 5032 5033 return rc; 5034 } 5035 5036 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 5037 struct hl_cs_parser *parser, 5038 struct packet_lin_dma *user_dma_pkt) 5039 { 5040 bool src_in_host = false; 5041 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5042 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5043 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5044 5045 dev_dbg(hdev->dev, "DMA packet details:\n"); 5046 dev_dbg(hdev->dev, "source == 0x%llx\n", 5047 le64_to_cpu(user_dma_pkt->src_addr)); 5048 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 5049 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 5050 5051 /* 5052 * Special handling for DMA with size 0. Bypass all validations 5053 * because no transactions will be done except for WR_COMP, which 5054 * is not a security issue 5055 */ 5056 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5057 parser->patched_cb_size += sizeof(*user_dma_pkt); 5058 return 0; 5059 } 5060 5061 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5062 src_in_host = true; 5063 5064 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5065 src_in_host); 5066 } 5067 5068 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5069 struct hl_cs_parser *parser, 5070 struct packet_load_and_exe *user_pkt) 5071 { 5072 u32 cfg; 5073 5074 cfg = le32_to_cpu(user_pkt->cfg); 5075 5076 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5077 dev_err(hdev->dev, 5078 "User not allowed to use Load and Execute\n"); 5079 return -EPERM; 5080 } 5081 5082 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5083 5084 return 0; 5085 } 5086 5087 static int gaudi_validate_cb(struct hl_device *hdev, 5088 struct hl_cs_parser *parser, bool is_mmu) 5089 { 5090 u32 cb_parsed_length = 0; 5091 int rc = 0; 5092 5093 parser->patched_cb_size = 0; 5094 5095 /* cb_user_size is more than 0 so loop will always be executed */ 5096 while (cb_parsed_length < parser->user_cb_size) { 5097 enum packet_id pkt_id; 5098 u16 pkt_size; 5099 struct gaudi_packet *user_pkt; 5100 5101 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5102 5103 pkt_id = (enum packet_id) ( 5104 (le64_to_cpu(user_pkt->header) & 5105 PACKET_HEADER_PACKET_ID_MASK) >> 5106 PACKET_HEADER_PACKET_ID_SHIFT); 5107 5108 if (!validate_packet_id(pkt_id)) { 5109 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5110 rc = -EINVAL; 5111 break; 5112 } 5113 5114 pkt_size = gaudi_packet_sizes[pkt_id]; 5115 cb_parsed_length += pkt_size; 5116 if (cb_parsed_length > parser->user_cb_size) { 5117 dev_err(hdev->dev, 5118 "packet 0x%x is out of CB boundary\n", pkt_id); 5119 rc = -EINVAL; 5120 break; 5121 } 5122 5123 switch (pkt_id) { 5124 case PACKET_MSG_PROT: 5125 dev_err(hdev->dev, 5126 "User not allowed to use MSG_PROT\n"); 5127 rc = -EPERM; 5128 break; 5129 5130 case PACKET_CP_DMA: 5131 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5132 rc = -EPERM; 5133 break; 5134 5135 case PACKET_STOP: 5136 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5137 rc = -EPERM; 5138 break; 5139 5140 case PACKET_WREG_BULK: 5141 dev_err(hdev->dev, 5142 "User not allowed to use WREG_BULK\n"); 5143 rc = -EPERM; 5144 break; 5145 5146 case PACKET_LOAD_AND_EXE: 5147 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5148 (struct packet_load_and_exe *) user_pkt); 5149 break; 5150 5151 case PACKET_LIN_DMA: 5152 parser->contains_dma_pkt = true; 5153 if (is_mmu) 5154 parser->patched_cb_size += pkt_size; 5155 else 5156 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5157 (struct packet_lin_dma *) user_pkt); 5158 break; 5159 5160 case PACKET_WREG_32: 5161 case PACKET_MSG_LONG: 5162 case PACKET_MSG_SHORT: 5163 case PACKET_REPEAT: 5164 case PACKET_FENCE: 5165 case PACKET_NOP: 5166 case PACKET_ARB_POINT: 5167 parser->patched_cb_size += pkt_size; 5168 break; 5169 5170 default: 5171 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5172 pkt_id); 5173 rc = -EINVAL; 5174 break; 5175 } 5176 5177 if (rc) 5178 break; 5179 } 5180 5181 /* 5182 * The new CB should have space at the end for two MSG_PROT packets: 5183 * 1. Optional NOP padding for cacheline alignment 5184 * 2. A packet that will act as a completion packet 5185 * 3. A packet that will generate MSI interrupt 5186 */ 5187 if (parser->completion) 5188 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5189 parser->patched_cb_size); 5190 5191 return rc; 5192 } 5193 5194 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5195 struct hl_cs_parser *parser, 5196 struct packet_lin_dma *user_dma_pkt, 5197 struct packet_lin_dma *new_dma_pkt, 5198 u32 *new_dma_pkt_size) 5199 { 5200 struct hl_userptr *userptr; 5201 struct scatterlist *sg, *sg_next_iter; 5202 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5203 u64 len, len_next; 5204 dma_addr_t dma_addr, dma_addr_next; 5205 u64 device_memory_addr, addr; 5206 enum dma_data_direction dir; 5207 struct sg_table *sgt; 5208 bool src_in_host = false; 5209 bool skip_host_mem_pin = false; 5210 bool user_memset; 5211 5212 ctl = le32_to_cpu(user_dma_pkt->ctl); 5213 5214 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5215 src_in_host = true; 5216 5217 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5218 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5219 5220 if (src_in_host) { 5221 addr = le64_to_cpu(user_dma_pkt->src_addr); 5222 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5223 dir = DMA_TO_DEVICE; 5224 if (user_memset) 5225 skip_host_mem_pin = true; 5226 } else { 5227 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5228 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5229 dir = DMA_FROM_DEVICE; 5230 } 5231 5232 if ((!skip_host_mem_pin) && 5233 (!hl_userptr_is_pinned(hdev, addr, 5234 le32_to_cpu(user_dma_pkt->tsize), 5235 parser->job_userptr_list, &userptr))) { 5236 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5237 addr, user_dma_pkt->tsize); 5238 return -EFAULT; 5239 } 5240 5241 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5242 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5243 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5244 return 0; 5245 } 5246 5247 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5248 5249 sgt = userptr->sgt; 5250 dma_desc_cnt = 0; 5251 5252 for_each_sgtable_dma_sg(sgt, sg, count) { 5253 len = sg_dma_len(sg); 5254 dma_addr = sg_dma_address(sg); 5255 5256 if (len == 0) 5257 break; 5258 5259 while ((count + 1) < sgt->nents) { 5260 sg_next_iter = sg_next(sg); 5261 len_next = sg_dma_len(sg_next_iter); 5262 dma_addr_next = sg_dma_address(sg_next_iter); 5263 5264 if (len_next == 0) 5265 break; 5266 5267 if ((dma_addr + len == dma_addr_next) && 5268 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5269 len += len_next; 5270 count++; 5271 sg = sg_next_iter; 5272 } else { 5273 break; 5274 } 5275 } 5276 5277 ctl = le32_to_cpu(user_dma_pkt->ctl); 5278 if (likely(dma_desc_cnt)) 5279 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5280 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5281 new_dma_pkt->ctl = cpu_to_le32(ctl); 5282 new_dma_pkt->tsize = cpu_to_le32(len); 5283 5284 if (dir == DMA_TO_DEVICE) { 5285 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5286 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5287 } else { 5288 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5289 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5290 } 5291 5292 if (!user_memset) 5293 device_memory_addr += len; 5294 dma_desc_cnt++; 5295 new_dma_pkt++; 5296 } 5297 5298 if (!dma_desc_cnt) { 5299 dev_err(hdev->dev, 5300 "Error of 0 SG entries when patching DMA packet\n"); 5301 return -EFAULT; 5302 } 5303 5304 /* Fix the last dma packet - wrcomp must be as user set it */ 5305 new_dma_pkt--; 5306 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5307 5308 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5309 5310 return 0; 5311 } 5312 5313 static int gaudi_patch_cb(struct hl_device *hdev, 5314 struct hl_cs_parser *parser) 5315 { 5316 u32 cb_parsed_length = 0; 5317 u32 cb_patched_cur_length = 0; 5318 int rc = 0; 5319 5320 /* cb_user_size is more than 0 so loop will always be executed */ 5321 while (cb_parsed_length < parser->user_cb_size) { 5322 enum packet_id pkt_id; 5323 u16 pkt_size; 5324 u32 new_pkt_size = 0; 5325 struct gaudi_packet *user_pkt, *kernel_pkt; 5326 5327 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5328 kernel_pkt = parser->patched_cb->kernel_address + 5329 cb_patched_cur_length; 5330 5331 pkt_id = (enum packet_id) ( 5332 (le64_to_cpu(user_pkt->header) & 5333 PACKET_HEADER_PACKET_ID_MASK) >> 5334 PACKET_HEADER_PACKET_ID_SHIFT); 5335 5336 if (!validate_packet_id(pkt_id)) { 5337 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5338 rc = -EINVAL; 5339 break; 5340 } 5341 5342 pkt_size = gaudi_packet_sizes[pkt_id]; 5343 cb_parsed_length += pkt_size; 5344 if (cb_parsed_length > parser->user_cb_size) { 5345 dev_err(hdev->dev, 5346 "packet 0x%x is out of CB boundary\n", pkt_id); 5347 rc = -EINVAL; 5348 break; 5349 } 5350 5351 switch (pkt_id) { 5352 case PACKET_LIN_DMA: 5353 rc = gaudi_patch_dma_packet(hdev, parser, 5354 (struct packet_lin_dma *) user_pkt, 5355 (struct packet_lin_dma *) kernel_pkt, 5356 &new_pkt_size); 5357 cb_patched_cur_length += new_pkt_size; 5358 break; 5359 5360 case PACKET_MSG_PROT: 5361 dev_err(hdev->dev, 5362 "User not allowed to use MSG_PROT\n"); 5363 rc = -EPERM; 5364 break; 5365 5366 case PACKET_CP_DMA: 5367 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5368 rc = -EPERM; 5369 break; 5370 5371 case PACKET_STOP: 5372 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5373 rc = -EPERM; 5374 break; 5375 5376 case PACKET_WREG_32: 5377 case PACKET_WREG_BULK: 5378 case PACKET_MSG_LONG: 5379 case PACKET_MSG_SHORT: 5380 case PACKET_REPEAT: 5381 case PACKET_FENCE: 5382 case PACKET_NOP: 5383 case PACKET_ARB_POINT: 5384 case PACKET_LOAD_AND_EXE: 5385 memcpy(kernel_pkt, user_pkt, pkt_size); 5386 cb_patched_cur_length += pkt_size; 5387 break; 5388 5389 default: 5390 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5391 pkt_id); 5392 rc = -EINVAL; 5393 break; 5394 } 5395 5396 if (rc) 5397 break; 5398 } 5399 5400 return rc; 5401 } 5402 5403 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5404 struct hl_cs_parser *parser) 5405 { 5406 u64 handle; 5407 u32 patched_cb_size; 5408 struct hl_cb *user_cb; 5409 int rc; 5410 5411 /* 5412 * The new CB should have space at the end for two MSG_PROT packets: 5413 * 1. Optional NOP padding for cacheline alignment 5414 * 2. A packet that will act as a completion packet 5415 * 3. A packet that will generate MSI interrupt 5416 */ 5417 if (parser->completion) 5418 parser->patched_cb_size = parser->user_cb_size + 5419 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5420 else 5421 parser->patched_cb_size = parser->user_cb_size; 5422 5423 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5424 parser->patched_cb_size, false, false, 5425 &handle); 5426 5427 if (rc) { 5428 dev_err(hdev->dev, 5429 "Failed to allocate patched CB for DMA CS %d\n", 5430 rc); 5431 return rc; 5432 } 5433 5434 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5435 /* hl_cb_get should never fail */ 5436 if (!parser->patched_cb) { 5437 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5438 rc = -EFAULT; 5439 goto out; 5440 } 5441 5442 /* 5443 * We are protected from overflow because the check 5444 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5445 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5446 * 5447 * There is no option to reach here without going through that check because: 5448 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5449 * an external queue. 5450 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5451 */ 5452 memcpy(parser->patched_cb->kernel_address, 5453 parser->user_cb->kernel_address, 5454 parser->user_cb_size); 5455 5456 patched_cb_size = parser->patched_cb_size; 5457 5458 /* Validate patched CB instead of user CB */ 5459 user_cb = parser->user_cb; 5460 parser->user_cb = parser->patched_cb; 5461 rc = gaudi_validate_cb(hdev, parser, true); 5462 parser->user_cb = user_cb; 5463 5464 if (rc) { 5465 hl_cb_put(parser->patched_cb); 5466 goto out; 5467 } 5468 5469 if (patched_cb_size != parser->patched_cb_size) { 5470 dev_err(hdev->dev, "user CB size mismatch\n"); 5471 hl_cb_put(parser->patched_cb); 5472 rc = -EINVAL; 5473 goto out; 5474 } 5475 5476 out: 5477 /* 5478 * Always call cb destroy here because we still have 1 reference 5479 * to it by calling cb_get earlier. After the job will be completed, 5480 * cb_put will release it, but here we want to remove it from the 5481 * idr 5482 */ 5483 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5484 5485 return rc; 5486 } 5487 5488 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5489 struct hl_cs_parser *parser) 5490 { 5491 u64 handle; 5492 int rc; 5493 5494 rc = gaudi_validate_cb(hdev, parser, false); 5495 5496 if (rc) 5497 goto free_userptr; 5498 5499 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5500 parser->patched_cb_size, false, false, 5501 &handle); 5502 if (rc) { 5503 dev_err(hdev->dev, 5504 "Failed to allocate patched CB for DMA CS %d\n", rc); 5505 goto free_userptr; 5506 } 5507 5508 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5509 /* hl_cb_get should never fail here */ 5510 if (!parser->patched_cb) { 5511 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5512 rc = -EFAULT; 5513 goto out; 5514 } 5515 5516 rc = gaudi_patch_cb(hdev, parser); 5517 5518 if (rc) 5519 hl_cb_put(parser->patched_cb); 5520 5521 out: 5522 /* 5523 * Always call cb destroy here because we still have 1 reference 5524 * to it by calling cb_get earlier. After the job will be completed, 5525 * cb_put will release it, but here we want to remove it from the 5526 * idr 5527 */ 5528 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5529 5530 free_userptr: 5531 if (rc) 5532 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5533 return rc; 5534 } 5535 5536 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5537 struct hl_cs_parser *parser) 5538 { 5539 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5540 struct gaudi_device *gaudi = hdev->asic_specific; 5541 u32 nic_queue_offset, nic_mask_q_id; 5542 5543 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5544 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5545 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5546 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5547 5548 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5549 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5550 return -EINVAL; 5551 } 5552 } 5553 5554 /* For internal queue jobs just check if CB address is valid */ 5555 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5556 parser->user_cb_size, 5557 asic_prop->sram_user_base_address, 5558 asic_prop->sram_end_address)) 5559 return 0; 5560 5561 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5562 parser->user_cb_size, 5563 asic_prop->dram_user_base_address, 5564 asic_prop->dram_end_address)) 5565 return 0; 5566 5567 /* PMMU and HPMMU addresses are equal, check only one of them */ 5568 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5569 parser->user_cb_size, 5570 asic_prop->pmmu.start_addr, 5571 asic_prop->pmmu.end_addr)) 5572 return 0; 5573 5574 dev_err(hdev->dev, 5575 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5576 parser->user_cb, parser->user_cb_size); 5577 5578 return -EFAULT; 5579 } 5580 5581 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5582 { 5583 struct gaudi_device *gaudi = hdev->asic_specific; 5584 5585 if (parser->queue_type == QUEUE_TYPE_INT) 5586 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5587 5588 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5589 return gaudi_parse_cb_mmu(hdev, parser); 5590 else 5591 return gaudi_parse_cb_no_mmu(hdev, parser); 5592 } 5593 5594 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5595 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5596 u32 msi_vec, bool eb) 5597 { 5598 struct gaudi_device *gaudi = hdev->asic_specific; 5599 struct packet_msg_prot *cq_pkt; 5600 struct packet_nop *cq_padding; 5601 u64 msi_addr; 5602 u32 tmp; 5603 5604 cq_padding = kernel_address + original_len; 5605 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5606 5607 while ((void *)cq_padding < (void *)cq_pkt) { 5608 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5609 cq_padding++; 5610 } 5611 5612 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5613 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5614 5615 if (eb) 5616 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5617 5618 cq_pkt->ctl = cpu_to_le32(tmp); 5619 cq_pkt->value = cpu_to_le32(cq_val); 5620 cq_pkt->addr = cpu_to_le64(cq_addr); 5621 5622 cq_pkt++; 5623 5624 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5625 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5626 cq_pkt->ctl = cpu_to_le32(tmp); 5627 cq_pkt->value = cpu_to_le32(1); 5628 5629 if (gaudi->multi_msi_mode) 5630 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; 5631 else 5632 msi_addr = mmPCIE_CORE_MSI_REQ; 5633 5634 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5635 } 5636 5637 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5638 { 5639 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5640 } 5641 5642 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5643 u32 size, u64 val) 5644 { 5645 struct packet_lin_dma *lin_dma_pkt; 5646 struct hl_cs_job *job; 5647 u32 cb_size, ctl, err_cause; 5648 struct hl_cb *cb; 5649 int rc; 5650 5651 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5652 if (!cb) 5653 return -EFAULT; 5654 5655 lin_dma_pkt = cb->kernel_address; 5656 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5657 cb_size = sizeof(*lin_dma_pkt); 5658 5659 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5660 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5661 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5662 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5663 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5664 5665 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5666 lin_dma_pkt->src_addr = cpu_to_le64(val); 5667 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5668 lin_dma_pkt->tsize = cpu_to_le32(size); 5669 5670 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5671 if (!job) { 5672 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5673 rc = -ENOMEM; 5674 goto release_cb; 5675 } 5676 5677 /* Verify DMA is OK */ 5678 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5679 if (err_cause && !hdev->init_done) { 5680 dev_dbg(hdev->dev, 5681 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5682 err_cause); 5683 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5684 } 5685 5686 job->id = 0; 5687 job->user_cb = cb; 5688 atomic_inc(&job->user_cb->cs_cnt); 5689 job->user_cb_size = cb_size; 5690 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5691 job->patched_cb = job->user_cb; 5692 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5693 5694 hl_debugfs_add_job(hdev, job); 5695 5696 rc = gaudi_send_job_on_qman0(hdev, job); 5697 hl_debugfs_remove_job(hdev, job); 5698 kfree(job); 5699 atomic_dec(&cb->cs_cnt); 5700 5701 /* Verify DMA is OK */ 5702 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5703 if (err_cause) { 5704 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5705 rc = -EIO; 5706 if (!hdev->init_done) { 5707 dev_dbg(hdev->dev, 5708 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5709 err_cause); 5710 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5711 } 5712 } 5713 5714 release_cb: 5715 hl_cb_put(cb); 5716 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5717 5718 return rc; 5719 } 5720 5721 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5722 u32 num_regs, u32 val) 5723 { 5724 struct packet_msg_long *pkt; 5725 struct hl_cs_job *job; 5726 u32 cb_size, ctl; 5727 struct hl_cb *cb; 5728 int i, rc; 5729 5730 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5731 5732 if (cb_size > SZ_2M) { 5733 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5734 return -ENOMEM; 5735 } 5736 5737 cb = hl_cb_kernel_create(hdev, cb_size, false); 5738 if (!cb) 5739 return -EFAULT; 5740 5741 pkt = cb->kernel_address; 5742 5743 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5744 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5745 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5746 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5747 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5748 5749 for (i = 0; i < num_regs ; i++, pkt++) { 5750 pkt->ctl = cpu_to_le32(ctl); 5751 pkt->value = cpu_to_le32(val); 5752 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5753 } 5754 5755 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5756 if (!job) { 5757 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5758 rc = -ENOMEM; 5759 goto release_cb; 5760 } 5761 5762 job->id = 0; 5763 job->user_cb = cb; 5764 atomic_inc(&job->user_cb->cs_cnt); 5765 job->user_cb_size = cb_size; 5766 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5767 job->patched_cb = job->user_cb; 5768 job->job_cb_size = cb_size; 5769 5770 hl_debugfs_add_job(hdev, job); 5771 5772 rc = gaudi_send_job_on_qman0(hdev, job); 5773 hl_debugfs_remove_job(hdev, job); 5774 kfree(job); 5775 atomic_dec(&cb->cs_cnt); 5776 5777 release_cb: 5778 hl_cb_put(cb); 5779 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5780 5781 return rc; 5782 } 5783 5784 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5785 { 5786 u64 base_addr; 5787 u32 num_regs; 5788 int rc; 5789 5790 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5791 num_regs = NUM_OF_SOB_IN_BLOCK; 5792 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5793 if (rc) { 5794 dev_err(hdev->dev, "failed resetting SM registers"); 5795 return -ENOMEM; 5796 } 5797 5798 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5799 num_regs = NUM_OF_SOB_IN_BLOCK; 5800 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5801 if (rc) { 5802 dev_err(hdev->dev, "failed resetting SM registers"); 5803 return -ENOMEM; 5804 } 5805 5806 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5807 num_regs = NUM_OF_SOB_IN_BLOCK; 5808 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5809 if (rc) { 5810 dev_err(hdev->dev, "failed resetting SM registers"); 5811 return -ENOMEM; 5812 } 5813 5814 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5815 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5816 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5817 if (rc) { 5818 dev_err(hdev->dev, "failed resetting SM registers"); 5819 return -ENOMEM; 5820 } 5821 5822 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5823 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5824 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5825 if (rc) { 5826 dev_err(hdev->dev, "failed resetting SM registers"); 5827 return -ENOMEM; 5828 } 5829 5830 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5831 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5832 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5833 if (rc) { 5834 dev_err(hdev->dev, "failed resetting SM registers"); 5835 return -ENOMEM; 5836 } 5837 5838 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5839 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5840 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5841 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5842 if (rc) { 5843 dev_err(hdev->dev, "failed resetting SM registers"); 5844 return -ENOMEM; 5845 } 5846 5847 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5848 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5849 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5850 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5851 if (rc) { 5852 dev_err(hdev->dev, "failed resetting SM registers"); 5853 return -ENOMEM; 5854 } 5855 5856 return 0; 5857 } 5858 5859 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5860 { 5861 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5862 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5863 int i; 5864 5865 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5866 u64 sob_addr = CFG_BASE + 5867 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5868 (i * sob_delta); 5869 u32 dma_offset = i * DMA_CORE_OFFSET; 5870 5871 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5872 lower_32_bits(sob_addr)); 5873 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5874 upper_32_bits(sob_addr)); 5875 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5876 5877 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5878 * modified by the user for SRAM reduction 5879 */ 5880 if (i > 1) 5881 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5882 0x00000001); 5883 } 5884 } 5885 5886 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5887 { 5888 u32 qman_offset; 5889 int i; 5890 5891 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5892 qman_offset = i * DMA_QMAN_OFFSET; 5893 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5894 } 5895 5896 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5897 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5898 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5899 } 5900 5901 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5902 qman_offset = i * TPC_QMAN_OFFSET; 5903 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5904 } 5905 5906 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5907 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5908 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5909 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5910 } 5911 } 5912 5913 static int gaudi_restore_user_registers(struct hl_device *hdev) 5914 { 5915 int rc; 5916 5917 rc = gaudi_restore_sm_registers(hdev); 5918 if (rc) 5919 return rc; 5920 5921 gaudi_restore_dma_registers(hdev); 5922 gaudi_restore_qm_registers(hdev); 5923 5924 return 0; 5925 } 5926 5927 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5928 { 5929 return 0; 5930 } 5931 5932 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5933 { 5934 u32 size = hdev->asic_prop.mmu_pgt_size + 5935 hdev->asic_prop.mmu_cache_mng_size; 5936 struct gaudi_device *gaudi = hdev->asic_specific; 5937 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5938 5939 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5940 return 0; 5941 5942 return gaudi_memset_device_memory(hdev, addr, size, 0); 5943 } 5944 5945 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5946 { 5947 5948 } 5949 5950 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5951 u32 size_to_dma, dma_addr_t dma_addr) 5952 { 5953 u32 err_cause, val; 5954 u64 dma_offset; 5955 int rc; 5956 5957 dma_offset = dma_id * DMA_CORE_OFFSET; 5958 5959 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5960 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5961 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5962 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5963 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5964 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5965 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5966 5967 rc = hl_poll_timeout( 5968 hdev, 5969 mmDMA0_CORE_STS0 + dma_offset, 5970 val, 5971 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5972 0, 5973 1000000); 5974 5975 if (rc) { 5976 dev_err(hdev->dev, 5977 "DMA %d timed-out during reading of 0x%llx\n", 5978 dma_id, addr); 5979 return -EIO; 5980 } 5981 5982 /* Verify DMA is OK */ 5983 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5984 if (err_cause) { 5985 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5986 dev_dbg(hdev->dev, 5987 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5988 err_cause); 5989 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5990 5991 return -EIO; 5992 } 5993 5994 return 0; 5995 } 5996 5997 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5998 void *blob_addr) 5999 { 6000 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 6001 u32 qm_glbl_sts0, qm_cgm_sts; 6002 u64 dma_offset, qm_offset; 6003 dma_addr_t dma_addr; 6004 void *kernel_addr; 6005 bool is_eng_idle; 6006 int rc = 0, dma_id; 6007 6008 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 6009 6010 if (!kernel_addr) 6011 return -ENOMEM; 6012 6013 hdev->asic_funcs->hw_queues_lock(hdev); 6014 6015 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 6016 dma_offset = dma_id * DMA_CORE_OFFSET; 6017 qm_offset = dma_id * DMA_QMAN_OFFSET; 6018 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6019 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6020 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6021 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6022 IS_DMA_IDLE(dma_core_sts0); 6023 6024 if (!is_eng_idle) { 6025 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 6026 dma_offset = dma_id * DMA_CORE_OFFSET; 6027 qm_offset = dma_id * DMA_QMAN_OFFSET; 6028 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6029 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6030 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6031 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6032 IS_DMA_IDLE(dma_core_sts0); 6033 6034 if (!is_eng_idle) { 6035 dev_err_ratelimited(hdev->dev, 6036 "Can't read via DMA because it is BUSY\n"); 6037 rc = -EAGAIN; 6038 goto out; 6039 } 6040 } 6041 6042 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 6043 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 6044 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 6045 6046 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6047 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6048 * ASID 6049 */ 6050 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6051 6052 /* Verify DMA is OK */ 6053 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6054 if (err_cause) { 6055 dev_dbg(hdev->dev, 6056 "Clearing DMA0 engine from errors (cause 0x%x)\n", 6057 err_cause); 6058 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 6059 } 6060 6061 pos = 0; 6062 size_left = size; 6063 size_to_dma = SZ_2M; 6064 6065 while (size_left > 0) { 6066 6067 if (size_left < SZ_2M) 6068 size_to_dma = size_left; 6069 6070 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6071 dma_addr); 6072 if (rc) 6073 break; 6074 6075 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6076 6077 if (size_left <= SZ_2M) 6078 break; 6079 6080 pos += SZ_2M; 6081 addr += SZ_2M; 6082 size_left -= SZ_2M; 6083 } 6084 6085 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6086 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6087 * ASID 6088 */ 6089 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6090 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6091 6092 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6093 6094 out: 6095 hdev->asic_funcs->hw_queues_unlock(hdev); 6096 6097 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6098 6099 return rc; 6100 } 6101 6102 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6103 { 6104 struct gaudi_device *gaudi = hdev->asic_specific; 6105 6106 if (hdev->reset_info.hard_reset_pending) 6107 return U64_MAX; 6108 6109 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6110 (addr - gaudi->hbm_bar_cur_addr)); 6111 } 6112 6113 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6114 { 6115 struct gaudi_device *gaudi = hdev->asic_specific; 6116 6117 if (hdev->reset_info.hard_reset_pending) 6118 return; 6119 6120 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6121 (addr - gaudi->hbm_bar_cur_addr)); 6122 } 6123 6124 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6125 { 6126 /* mask to zero the MMBP and ASID bits */ 6127 WREG32_AND(reg, ~0x7FF); 6128 WREG32_OR(reg, asid); 6129 } 6130 6131 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6132 { 6133 struct gaudi_device *gaudi = hdev->asic_specific; 6134 6135 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6136 return; 6137 6138 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6139 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6140 return; 6141 } 6142 6143 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6144 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6145 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6146 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6147 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6148 6149 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6152 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6154 6155 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6160 6161 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6166 6167 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6168 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6172 6173 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6176 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6178 6179 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6184 6185 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6190 6191 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6192 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6199 6200 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6207 6208 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6213 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6214 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6215 6216 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6217 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6218 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6219 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6220 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6221 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6222 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6223 6224 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6225 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6226 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6227 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6228 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6229 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6230 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6231 6232 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6233 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6234 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6235 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6236 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6237 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6238 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6239 6240 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6241 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6242 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6243 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6244 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6245 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6246 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6247 6248 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6249 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6250 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6251 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6252 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6253 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6254 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6255 6256 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6257 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6258 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6259 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6260 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6261 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6262 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6263 6264 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6265 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6266 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6267 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6268 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6269 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6270 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6271 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6272 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6273 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6274 6275 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6276 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6277 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6278 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6279 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6280 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6281 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6282 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6283 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6284 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6285 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6286 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6287 6288 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6289 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6290 asid); 6291 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6292 asid); 6293 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6294 asid); 6295 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6296 asid); 6297 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6298 asid); 6299 } 6300 6301 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6302 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6303 asid); 6304 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6305 asid); 6306 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6307 asid); 6308 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6309 asid); 6310 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6311 asid); 6312 } 6313 6314 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6315 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6316 asid); 6317 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6318 asid); 6319 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6320 asid); 6321 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6322 asid); 6323 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6324 asid); 6325 } 6326 6327 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6328 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6329 asid); 6330 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6331 asid); 6332 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6333 asid); 6334 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6335 asid); 6336 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6337 asid); 6338 } 6339 6340 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6341 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6342 asid); 6343 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6344 asid); 6345 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6346 asid); 6347 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6348 asid); 6349 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6350 asid); 6351 } 6352 6353 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6354 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6355 asid); 6356 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6357 asid); 6358 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6359 asid); 6360 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6361 asid); 6362 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6363 asid); 6364 } 6365 6366 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6367 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6368 asid); 6369 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6370 asid); 6371 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6372 asid); 6373 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6374 asid); 6375 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6376 asid); 6377 } 6378 6379 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6380 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6381 asid); 6382 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6383 asid); 6384 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6385 asid); 6386 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6387 asid); 6388 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6389 asid); 6390 } 6391 6392 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6393 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6394 asid); 6395 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6396 asid); 6397 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6398 asid); 6399 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6400 asid); 6401 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6402 asid); 6403 } 6404 6405 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6406 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6407 asid); 6408 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6409 asid); 6410 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6411 asid); 6412 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6413 asid); 6414 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6415 asid); 6416 } 6417 6418 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6419 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6420 } 6421 6422 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6423 struct hl_cs_job *job) 6424 { 6425 struct packet_msg_prot *fence_pkt; 6426 u32 *fence_ptr; 6427 dma_addr_t fence_dma_addr; 6428 struct hl_cb *cb; 6429 u32 tmp, timeout, dma_offset; 6430 int rc; 6431 6432 if (hdev->pldm) 6433 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6434 else 6435 timeout = HL_DEVICE_TIMEOUT_USEC; 6436 6437 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6438 if (!fence_ptr) { 6439 dev_err(hdev->dev, 6440 "Failed to allocate fence memory for QMAN0\n"); 6441 return -ENOMEM; 6442 } 6443 6444 cb = job->patched_cb; 6445 6446 fence_pkt = cb->kernel_address + 6447 job->job_cb_size - sizeof(struct packet_msg_prot); 6448 6449 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6450 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6451 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6452 6453 fence_pkt->ctl = cpu_to_le32(tmp); 6454 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6455 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6456 6457 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6458 6459 WREG32(mmDMA0_CORE_PROT + dma_offset, 6460 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6461 6462 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6463 job->job_cb_size, cb->bus_address); 6464 if (rc) { 6465 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6466 goto free_fence_ptr; 6467 } 6468 6469 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6470 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6471 timeout, true); 6472 6473 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6474 6475 if (rc == -ETIMEDOUT) { 6476 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6477 goto free_fence_ptr; 6478 } 6479 6480 free_fence_ptr: 6481 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6482 6483 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6484 return rc; 6485 } 6486 6487 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6488 { 6489 if (event_type >= GAUDI_EVENT_SIZE) 6490 goto event_not_supported; 6491 6492 if (!gaudi_irq_map_table[event_type].valid) 6493 goto event_not_supported; 6494 6495 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6496 6497 return; 6498 6499 event_not_supported: 6500 snprintf(desc, size, "N/A"); 6501 } 6502 6503 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6504 bool is_write, u16 *engine_id_1, 6505 u16 *engine_id_2) 6506 { 6507 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6508 6509 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6510 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6511 6512 switch (x_y) { 6513 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6514 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6515 dma_id[0] = 0; 6516 dma_id[1] = 2; 6517 break; 6518 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6519 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6520 dma_id[0] = 1; 6521 dma_id[1] = 3; 6522 break; 6523 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6524 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6525 dma_id[0] = 4; 6526 dma_id[1] = 6; 6527 break; 6528 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6529 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6530 dma_id[0] = 5; 6531 dma_id[1] = 7; 6532 break; 6533 default: 6534 goto unknown_initiator; 6535 } 6536 6537 for (i = 0 ; i < 2 ; i++) { 6538 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6539 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6540 } 6541 6542 switch (x_y) { 6543 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6544 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6545 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6546 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6547 return "DMA0"; 6548 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6549 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6550 return "DMA2"; 6551 } else { 6552 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6553 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6554 return "DMA0 or DMA2"; 6555 } 6556 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6557 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6558 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6559 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6560 return "DMA1"; 6561 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6562 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6563 return "DMA3"; 6564 } else { 6565 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6566 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6567 return "DMA1 or DMA3"; 6568 } 6569 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6570 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6571 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6572 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6573 return "DMA4"; 6574 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6575 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6576 return "DMA6"; 6577 } else { 6578 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6579 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6580 return "DMA4 or DMA6"; 6581 } 6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6584 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6585 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6586 return "DMA5"; 6587 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6588 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6589 return "DMA7"; 6590 } else { 6591 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6592 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6593 return "DMA5 or DMA7"; 6594 } 6595 } 6596 6597 unknown_initiator: 6598 return "unknown initiator"; 6599 } 6600 6601 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6602 u16 *engine_id_1, u16 *engine_id_2) 6603 { 6604 u32 val, x_y, axi_id; 6605 6606 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6607 RREG32(mmMMU_UP_RAZWI_READ_ID); 6608 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6609 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6610 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6611 RAZWI_INITIATOR_AXI_ID_SHIFT); 6612 6613 switch (x_y) { 6614 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6615 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6616 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6617 return "TPC0"; 6618 } 6619 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6620 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6621 return "NIC0"; 6622 } 6623 break; 6624 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6625 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6626 return "TPC1"; 6627 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6628 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6629 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6630 return "MME0"; 6631 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6632 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6633 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6634 return "MME1"; 6635 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6636 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6637 return "TPC2"; 6638 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6639 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6640 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6641 return "TPC3"; 6642 } 6643 /* PCI, CPU or PSOC does not have engine id*/ 6644 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6645 return "PCI"; 6646 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6647 return "CPU"; 6648 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6649 return "PSOC"; 6650 break; 6651 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6652 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6653 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6654 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6655 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6656 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6657 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6658 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6659 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6660 engine_id_1, engine_id_2); 6661 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6662 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6663 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6664 return "TPC4"; 6665 } 6666 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6667 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6668 return "NIC1"; 6669 } 6670 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6671 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6672 return "NIC2"; 6673 } 6674 break; 6675 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6676 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6677 return "TPC5"; 6678 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6679 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6680 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6681 return "MME2"; 6682 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6683 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6684 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6685 return "MME3"; 6686 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6687 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6688 return "TPC6"; 6689 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6690 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6691 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6692 return "TPC7"; 6693 } 6694 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6695 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6696 return "NIC4"; 6697 } 6698 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6699 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6700 return "NIC5"; 6701 } 6702 break; 6703 default: 6704 break; 6705 } 6706 6707 dev_err(hdev->dev, 6708 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6709 val, 6710 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6711 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6712 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6713 RAZWI_INITIATOR_AXI_ID_MASK); 6714 6715 return "unknown initiator"; 6716 } 6717 6718 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6719 u16 *engine_id_2, bool *is_read, bool *is_write) 6720 { 6721 6722 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6723 dev_err_ratelimited(hdev->dev, 6724 "RAZWI event caused by illegal write of %s\n", 6725 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6726 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6727 *is_write = true; 6728 } 6729 6730 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6731 dev_err_ratelimited(hdev->dev, 6732 "RAZWI event caused by illegal read of %s\n", 6733 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6734 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6735 *is_read = true; 6736 } 6737 } 6738 6739 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6740 { 6741 struct gaudi_device *gaudi = hdev->asic_specific; 6742 u32 val; 6743 6744 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6745 return; 6746 6747 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6748 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6749 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6750 *addr <<= 32; 6751 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6752 6753 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6754 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6755 6756 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6757 } 6758 6759 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6760 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6761 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6762 *addr <<= 32; 6763 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6764 6765 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6766 6767 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6768 } 6769 } 6770 6771 /* 6772 * +-------------------+------------------------------------------------------+ 6773 * | Configuration Reg | Description | 6774 * | Address | | 6775 * +-------------------+------------------------------------------------------+ 6776 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6777 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6778 * | |0xF34 memory wrappers 63:32 | 6779 * | |0xF38 memory wrappers 95:64 | 6780 * | |0xF3C memory wrappers 127:96 | 6781 * +-------------------+------------------------------------------------------+ 6782 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6783 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6784 * | |0xF44 memory wrappers 63:32 | 6785 * | |0xF48 memory wrappers 95:64 | 6786 * | |0xF4C memory wrappers 127:96 | 6787 * +-------------------+------------------------------------------------------+ 6788 */ 6789 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6790 struct ecc_info_extract_params *params, u64 *ecc_address, 6791 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6792 { 6793 u32 i, num_mem_regs, reg, err_bit; 6794 u64 err_addr, err_word = 0; 6795 6796 num_mem_regs = params->num_memories / 32 + 6797 ((params->num_memories % 32) ? 1 : 0); 6798 6799 if (params->block_address >= CFG_BASE) 6800 params->block_address -= CFG_BASE; 6801 6802 if (params->derr) 6803 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6804 else 6805 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6806 6807 /* Set invalid wrapper index */ 6808 *memory_wrapper_idx = 0xFF; 6809 6810 /* Iterate through memory wrappers, a single bit must be set */ 6811 for (i = 0 ; i < num_mem_regs ; i++) { 6812 err_addr += i * 4; 6813 err_word = RREG32(err_addr); 6814 if (err_word) { 6815 err_bit = __ffs(err_word); 6816 *memory_wrapper_idx = err_bit + (32 * i); 6817 break; 6818 } 6819 } 6820 6821 if (*memory_wrapper_idx == 0xFF) { 6822 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6823 return -EINVAL; 6824 } 6825 6826 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6827 *memory_wrapper_idx); 6828 6829 *ecc_address = 6830 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6831 *ecc_syndrom = 6832 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6833 6834 /* Clear error indication */ 6835 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6836 if (params->derr) 6837 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6838 else 6839 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6840 6841 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6842 6843 return 0; 6844 } 6845 6846 /* 6847 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6848 * 6849 * @idx: the current pi/ci value 6850 * @q_len: the queue length (power of 2) 6851 * 6852 * @return the cyclically decremented index 6853 */ 6854 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6855 { 6856 u32 mask = q_len - 1; 6857 6858 /* 6859 * modular decrement is equivalent to adding (queue_size -1) 6860 * later we take LSBs to make sure the value is in the 6861 * range [0, queue_len - 1] 6862 */ 6863 return (idx + q_len - 1) & mask; 6864 } 6865 6866 /** 6867 * gaudi_handle_sw_config_stream_data - print SW config stream data 6868 * 6869 * @hdev: pointer to the habanalabs device structure 6870 * @stream: the QMAN's stream 6871 * @qman_base: base address of QMAN registers block 6872 * @event_mask: mask of the last events occurred 6873 */ 6874 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6875 u64 qman_base, u64 event_mask) 6876 { 6877 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6878 u32 cq_ptr_lo_off, size; 6879 6880 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6881 6882 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6883 stream * cq_ptr_lo_off; 6884 cq_ptr_hi = cq_ptr_lo + 6885 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6886 cq_tsize = cq_ptr_lo + 6887 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6888 6889 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6890 size = RREG32(cq_tsize); 6891 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6892 stream, cq_ptr, size); 6893 6894 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6895 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6896 hdev->captured_err_info.undef_opcode.cq_size = size; 6897 hdev->captured_err_info.undef_opcode.stream_id = stream; 6898 } 6899 } 6900 6901 /** 6902 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6903 * 6904 * @hdev: pointer to the habanalabs device structure 6905 * @qid_base: first QID of the QMAN (out of 4 streams) 6906 * @stream: the QMAN's stream 6907 * @qman_base: base address of QMAN registers block 6908 * @event_mask: mask of the last events occurred 6909 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6910 */ 6911 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6912 u32 stream, u64 qman_base, 6913 u64 event_mask, 6914 bool pr_sw_conf) 6915 { 6916 u32 ci, qm_ci_stream_off, queue_len; 6917 struct hl_hw_queue *q; 6918 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6919 int i; 6920 6921 q = &hdev->kernel_queues[qid_base + stream]; 6922 6923 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6924 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6925 stream * qm_ci_stream_off; 6926 6927 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6928 q->int_queue_len : HL_QUEUE_LENGTH; 6929 6930 hdev->asic_funcs->hw_queues_lock(hdev); 6931 6932 if (pr_sw_conf) 6933 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6934 6935 ci = RREG32(pq_ci); 6936 6937 /* we should start printing form ci -1 */ 6938 ci = gaudi_queue_idx_dec(ci, queue_len); 6939 memset(addr, 0, sizeof(addr)); 6940 6941 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6942 struct hl_bd *bd; 6943 u32 len; 6944 6945 bd = q->kernel_address; 6946 bd += ci; 6947 6948 len = le32_to_cpu(bd->len); 6949 /* len 0 means uninitialized entry- break */ 6950 if (!len) 6951 break; 6952 6953 addr[i] = le64_to_cpu(bd->ptr); 6954 6955 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6956 stream, ci, addr[i], len); 6957 6958 /* get previous ci, wrap if needed */ 6959 ci = gaudi_queue_idx_dec(ci, queue_len); 6960 } 6961 6962 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6963 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6964 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6965 6966 if (arr_idx == 0) { 6967 undef_opcode->timestamp = ktime_get(); 6968 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6969 } 6970 6971 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6972 undef_opcode->cb_addr_streams_len++; 6973 } 6974 6975 hdev->asic_funcs->hw_queues_unlock(hdev); 6976 } 6977 6978 /** 6979 * handle_qman_data_on_err - extract QMAN data on error 6980 * 6981 * @hdev: pointer to the habanalabs device structure 6982 * @qid_base: first QID of the QMAN (out of 4 streams) 6983 * @stream: the QMAN's stream 6984 * @qman_base: base address of QMAN registers block 6985 * @event_mask: mask of the last events occurred 6986 * 6987 * This function attempt to exatract as much data as possible on QMAN error. 6988 * On upper CP print the SW config stream data and last 8 PQEs. 6989 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6990 */ 6991 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6992 u32 stream, u64 qman_base, u64 event_mask) 6993 { 6994 u32 i; 6995 6996 if (stream != QMAN_STREAMS) { 6997 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6998 qman_base, event_mask, true); 6999 return; 7000 } 7001 7002 /* handle Lower-CP */ 7003 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 7004 7005 for (i = 0; i < QMAN_STREAMS; i++) 7006 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 7007 qman_base, event_mask, false); 7008 } 7009 7010 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 7011 const char *qm_name, 7012 u64 qman_base, 7013 u32 qid_base, 7014 u64 *event_mask) 7015 { 7016 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 7017 u64 glbl_sts_addr, arb_err_addr; 7018 char reg_desc[32]; 7019 7020 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 7021 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 7022 7023 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 7024 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7025 glbl_sts_clr_val = 0; 7026 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7027 7028 if (!glbl_sts_val) 7029 continue; 7030 7031 if (i == QMAN_STREAMS) 7032 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7033 else 7034 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7035 7036 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 7037 if (glbl_sts_val & BIT(j)) { 7038 dev_err_ratelimited(hdev->dev, 7039 "%s %s. err cause: %s\n", 7040 qm_name, reg_desc, 7041 gaudi_qman_error_cause[j]); 7042 glbl_sts_clr_val |= BIT(j); 7043 } 7044 } 7045 /* check for undefined opcode */ 7046 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 7047 hdev->captured_err_info.undef_opcode.write_enable) { 7048 memset(&hdev->captured_err_info.undef_opcode, 0, 7049 sizeof(hdev->captured_err_info.undef_opcode)); 7050 7051 hdev->captured_err_info.undef_opcode.write_enable = false; 7052 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 7053 } 7054 7055 /* Write 1 clear errors */ 7056 if (!hdev->stop_on_err) 7057 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 7058 else 7059 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 7060 } 7061 7062 arb_err_val = RREG32(arb_err_addr); 7063 7064 if (!arb_err_val) 7065 return; 7066 7067 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7068 if (arb_err_val & BIT(j)) { 7069 dev_err_ratelimited(hdev->dev, 7070 "%s ARB_ERR. err cause: %s\n", 7071 qm_name, 7072 gaudi_qman_arb_error_cause[j]); 7073 } 7074 } 7075 } 7076 7077 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7078 struct hl_eq_sm_sei_data *sei_data) 7079 { 7080 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7081 7082 /* Flip the bits as the enum is ordered in the opposite way */ 7083 index = (index ^ 0x3) & 0x3; 7084 7085 switch (sei_data->sei_cause) { 7086 case SM_SEI_SO_OVERFLOW: 7087 dev_err_ratelimited(hdev->dev, 7088 "%s SEI Error: SOB Group %u overflow/underflow", 7089 gaudi_sync_manager_names[index], 7090 le32_to_cpu(sei_data->sei_log)); 7091 break; 7092 case SM_SEI_LBW_4B_UNALIGNED: 7093 dev_err_ratelimited(hdev->dev, 7094 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7095 gaudi_sync_manager_names[index], 7096 le32_to_cpu(sei_data->sei_log)); 7097 break; 7098 case SM_SEI_AXI_RESPONSE_ERR: 7099 dev_err_ratelimited(hdev->dev, 7100 "%s SEI Error: AXI ID %u response error", 7101 gaudi_sync_manager_names[index], 7102 le32_to_cpu(sei_data->sei_log)); 7103 break; 7104 default: 7105 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7106 le32_to_cpu(sei_data->sei_log)); 7107 break; 7108 } 7109 } 7110 7111 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7112 struct hl_eq_ecc_data *ecc_data) 7113 { 7114 struct ecc_info_extract_params params; 7115 u64 ecc_address = 0, ecc_syndrom = 0; 7116 u8 index, memory_wrapper_idx = 0; 7117 bool extract_info_from_fw; 7118 int rc; 7119 7120 if (hdev->asic_prop.fw_security_enabled) { 7121 extract_info_from_fw = true; 7122 goto extract_ecc_info; 7123 } 7124 7125 switch (event_type) { 7126 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7127 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7128 extract_info_from_fw = true; 7129 break; 7130 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7131 index = event_type - GAUDI_EVENT_TPC0_SERR; 7132 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7133 params.num_memories = 90; 7134 params.derr = false; 7135 extract_info_from_fw = false; 7136 break; 7137 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7138 index = event_type - GAUDI_EVENT_TPC0_DERR; 7139 params.block_address = 7140 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7141 params.num_memories = 90; 7142 params.derr = true; 7143 extract_info_from_fw = false; 7144 break; 7145 case GAUDI_EVENT_MME0_ACC_SERR: 7146 case GAUDI_EVENT_MME1_ACC_SERR: 7147 case GAUDI_EVENT_MME2_ACC_SERR: 7148 case GAUDI_EVENT_MME3_ACC_SERR: 7149 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7150 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7151 params.num_memories = 128; 7152 params.derr = false; 7153 extract_info_from_fw = false; 7154 break; 7155 case GAUDI_EVENT_MME0_ACC_DERR: 7156 case GAUDI_EVENT_MME1_ACC_DERR: 7157 case GAUDI_EVENT_MME2_ACC_DERR: 7158 case GAUDI_EVENT_MME3_ACC_DERR: 7159 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7160 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7161 params.num_memories = 128; 7162 params.derr = true; 7163 extract_info_from_fw = false; 7164 break; 7165 case GAUDI_EVENT_MME0_SBAB_SERR: 7166 case GAUDI_EVENT_MME1_SBAB_SERR: 7167 case GAUDI_EVENT_MME2_SBAB_SERR: 7168 case GAUDI_EVENT_MME3_SBAB_SERR: 7169 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7170 params.block_address = 7171 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7172 params.num_memories = 33; 7173 params.derr = false; 7174 extract_info_from_fw = false; 7175 break; 7176 case GAUDI_EVENT_MME0_SBAB_DERR: 7177 case GAUDI_EVENT_MME1_SBAB_DERR: 7178 case GAUDI_EVENT_MME2_SBAB_DERR: 7179 case GAUDI_EVENT_MME3_SBAB_DERR: 7180 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7181 params.block_address = 7182 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7183 params.num_memories = 33; 7184 params.derr = true; 7185 extract_info_from_fw = false; 7186 break; 7187 default: 7188 return; 7189 } 7190 7191 extract_ecc_info: 7192 if (extract_info_from_fw) { 7193 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7194 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7195 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7196 } else { 7197 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7198 &ecc_syndrom, &memory_wrapper_idx); 7199 if (rc) 7200 return; 7201 } 7202 7203 dev_err(hdev->dev, 7204 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7205 ecc_address, ecc_syndrom, memory_wrapper_idx); 7206 } 7207 7208 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7209 { 7210 u64 qman_base; 7211 char desc[32]; 7212 u32 qid_base; 7213 u8 index; 7214 7215 switch (event_type) { 7216 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7217 index = event_type - GAUDI_EVENT_TPC0_QM; 7218 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7219 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7220 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7221 break; 7222 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7223 if (event_type == GAUDI_EVENT_MME0_QM) { 7224 index = 0; 7225 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7226 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7227 index = 2; 7228 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7229 } 7230 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7231 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7232 break; 7233 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7234 index = event_type - GAUDI_EVENT_DMA0_QM; 7235 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7236 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7237 if (index > 1) 7238 qid_base++; 7239 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7240 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7241 break; 7242 case GAUDI_EVENT_NIC0_QM0: 7243 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7244 qman_base = mmNIC0_QM0_BASE; 7245 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7246 break; 7247 case GAUDI_EVENT_NIC0_QM1: 7248 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7249 qman_base = mmNIC0_QM1_BASE; 7250 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7251 break; 7252 case GAUDI_EVENT_NIC1_QM0: 7253 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7254 qman_base = mmNIC1_QM0_BASE; 7255 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7256 break; 7257 case GAUDI_EVENT_NIC1_QM1: 7258 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7259 qman_base = mmNIC1_QM1_BASE; 7260 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7261 break; 7262 case GAUDI_EVENT_NIC2_QM0: 7263 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7264 qman_base = mmNIC2_QM0_BASE; 7265 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7266 break; 7267 case GAUDI_EVENT_NIC2_QM1: 7268 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7269 qman_base = mmNIC2_QM1_BASE; 7270 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7271 break; 7272 case GAUDI_EVENT_NIC3_QM0: 7273 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7274 qman_base = mmNIC3_QM0_BASE; 7275 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7276 break; 7277 case GAUDI_EVENT_NIC3_QM1: 7278 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7279 qman_base = mmNIC3_QM1_BASE; 7280 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7281 break; 7282 case GAUDI_EVENT_NIC4_QM0: 7283 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7284 qman_base = mmNIC4_QM0_BASE; 7285 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7286 break; 7287 case GAUDI_EVENT_NIC4_QM1: 7288 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7289 qman_base = mmNIC4_QM1_BASE; 7290 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7291 break; 7292 default: 7293 return; 7294 } 7295 7296 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7297 } 7298 7299 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7300 bool razwi, u64 *event_mask) 7301 { 7302 bool is_read = false, is_write = false; 7303 u16 engine_id[2], num_of_razwi_eng = 0; 7304 char desc[64] = ""; 7305 u64 razwi_addr = 0; 7306 u8 razwi_flags = 0; 7307 7308 /* 7309 * Init engine id by default as not valid and only if razwi initiated from engine with 7310 * engine id it will get valid value. 7311 */ 7312 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7313 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7314 7315 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7316 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7317 event_type, desc); 7318 7319 if (razwi) { 7320 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7321 &is_write); 7322 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7323 7324 if (is_read) 7325 razwi_flags |= HL_RAZWI_READ; 7326 if (is_write) 7327 razwi_flags |= HL_RAZWI_WRITE; 7328 7329 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7330 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7331 num_of_razwi_eng = 2; 7332 else 7333 num_of_razwi_eng = 1; 7334 } 7335 7336 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags, 7337 event_mask); 7338 } 7339 } 7340 7341 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7342 struct cpucp_pkt_sync_err *sync_err) 7343 { 7344 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7345 7346 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7347 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7348 } 7349 7350 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7351 struct hl_eq_fw_alive *fw_alive) 7352 { 7353 dev_err(hdev->dev, 7354 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7355 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7356 le32_to_cpu(fw_alive->process_id), 7357 le32_to_cpu(fw_alive->thread_id), 7358 le64_to_cpu(fw_alive->uptime_seconds)); 7359 } 7360 7361 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7362 void *data) 7363 { 7364 char desc[64] = "", *type; 7365 struct eq_nic_sei_event *eq_nic_sei = data; 7366 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7367 7368 switch (eq_nic_sei->axi_error_cause) { 7369 case RXB: 7370 type = "RXB"; 7371 break; 7372 case RXE: 7373 type = "RXE"; 7374 break; 7375 case TXS: 7376 type = "TXS"; 7377 break; 7378 case TXE: 7379 type = "TXE"; 7380 break; 7381 case QPC_RESP: 7382 type = "QPC_RESP"; 7383 break; 7384 case NON_AXI_ERR: 7385 type = "NON_AXI_ERR"; 7386 break; 7387 case TMR: 7388 type = "TMR"; 7389 break; 7390 default: 7391 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7392 eq_nic_sei->axi_error_cause); 7393 type = "N/A"; 7394 break; 7395 } 7396 7397 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7398 eq_nic_sei->id); 7399 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7400 event_type, desc); 7401 } 7402 7403 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7404 { 7405 /* GAUDI doesn't support any reset except hard-reset */ 7406 return -EPERM; 7407 } 7408 7409 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7410 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7411 { 7412 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7413 int rc = 0; 7414 7415 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7416 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7417 if (!hbm_ecc_data) { 7418 dev_err(hdev->dev, "No FW ECC data"); 7419 return 0; 7420 } 7421 7422 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7423 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7424 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7425 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7426 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7427 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7428 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7429 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7430 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7431 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7432 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7433 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7434 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7435 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7436 7437 dev_err(hdev->dev, 7438 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7439 device, ch, wr_par, rd_par, ca_par, serr, derr); 7440 dev_err(hdev->dev, 7441 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7442 device, ch, hbm_ecc_data->first_addr, type, 7443 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7444 hbm_ecc_data->dec_cnt); 7445 return 0; 7446 } 7447 7448 if (hdev->asic_prop.fw_security_enabled) { 7449 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7450 return 0; 7451 } 7452 7453 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7454 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7455 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7456 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7457 if (val) { 7458 rc = -EIO; 7459 dev_err(hdev->dev, 7460 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7461 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7462 (val >> 2) & 0x1, (val >> 3) & 0x1, 7463 (val >> 4) & 0x1); 7464 7465 val2 = RREG32(base + ch * 0x1000 + 0x060); 7466 dev_err(hdev->dev, 7467 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7468 device, ch * 2, 7469 RREG32(base + ch * 0x1000 + 0x064), 7470 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7471 (val2 & 0xFF0000) >> 16, 7472 (val2 & 0xFF000000) >> 24); 7473 } 7474 7475 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7476 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7477 if (val) { 7478 rc = -EIO; 7479 dev_err(hdev->dev, 7480 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7481 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7482 (val >> 2) & 0x1, (val >> 3) & 0x1, 7483 (val >> 4) & 0x1); 7484 7485 val2 = RREG32(base + ch * 0x1000 + 0x070); 7486 dev_err(hdev->dev, 7487 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7488 device, ch * 2 + 1, 7489 RREG32(base + ch * 0x1000 + 0x074), 7490 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7491 (val2 & 0xFF0000) >> 16, 7492 (val2 & 0xFF000000) >> 24); 7493 } 7494 7495 /* Clear interrupts */ 7496 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7497 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7498 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7499 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7500 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7501 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7502 } 7503 7504 val = RREG32(base + 0x8F30); 7505 val2 = RREG32(base + 0x8F34); 7506 if (val | val2) { 7507 rc = -EIO; 7508 dev_err(hdev->dev, 7509 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7510 device, val, val2); 7511 } 7512 val = RREG32(base + 0x8F40); 7513 val2 = RREG32(base + 0x8F44); 7514 if (val | val2) { 7515 rc = -EIO; 7516 dev_err(hdev->dev, 7517 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7518 device, val, val2); 7519 } 7520 7521 return rc; 7522 } 7523 7524 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7525 { 7526 switch (hbm_event_type) { 7527 case GAUDI_EVENT_HBM0_SPI_0: 7528 case GAUDI_EVENT_HBM0_SPI_1: 7529 return 0; 7530 case GAUDI_EVENT_HBM1_SPI_0: 7531 case GAUDI_EVENT_HBM1_SPI_1: 7532 return 1; 7533 case GAUDI_EVENT_HBM2_SPI_0: 7534 case GAUDI_EVENT_HBM2_SPI_1: 7535 return 2; 7536 case GAUDI_EVENT_HBM3_SPI_0: 7537 case GAUDI_EVENT_HBM3_SPI_1: 7538 return 3; 7539 default: 7540 break; 7541 } 7542 7543 /* Should never happen */ 7544 return 0; 7545 } 7546 7547 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7548 char *interrupt_name) 7549 { 7550 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7551 bool soft_reset_required = false; 7552 7553 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7554 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7555 7556 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7557 if (tpc_interrupts_cause & BIT(i)) { 7558 dev_err_ratelimited(hdev->dev, 7559 "TPC%d_%s interrupt cause: %s\n", 7560 tpc_id, interrupt_name, 7561 gaudi_tpc_interrupts_cause[i]); 7562 /* If this is QM error, we need to soft-reset */ 7563 if (i == 15) 7564 soft_reset_required = true; 7565 } 7566 7567 /* Clear interrupts */ 7568 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7569 7570 return soft_reset_required; 7571 } 7572 7573 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7574 { 7575 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7576 } 7577 7578 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7579 { 7580 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7581 } 7582 7583 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7584 { 7585 ktime_t zero_time = ktime_set(0, 0); 7586 7587 mutex_lock(&hdev->clk_throttling.lock); 7588 7589 switch (event_type) { 7590 case GAUDI_EVENT_FIX_POWER_ENV_S: 7591 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7592 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7593 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7594 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7595 dev_info_ratelimited(hdev->dev, 7596 "Clock throttling due to power consumption\n"); 7597 break; 7598 7599 case GAUDI_EVENT_FIX_POWER_ENV_E: 7600 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7601 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7602 dev_info_ratelimited(hdev->dev, 7603 "Power envelop is safe, back to optimal clock\n"); 7604 break; 7605 7606 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7607 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7608 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7609 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7610 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7611 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7612 dev_info_ratelimited(hdev->dev, 7613 "Clock throttling due to overheating\n"); 7614 break; 7615 7616 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7617 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7618 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7619 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7620 dev_info_ratelimited(hdev->dev, 7621 "Thermal envelop is safe, back to optimal clock\n"); 7622 break; 7623 7624 default: 7625 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7626 event_type); 7627 break; 7628 } 7629 7630 mutex_unlock(&hdev->clk_throttling.lock); 7631 } 7632 7633 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7634 { 7635 struct gaudi_device *gaudi = hdev->asic_specific; 7636 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7637 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7638 u32 fw_fatal_err_flag = 0, flags = 0; 7639 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7640 >> EQ_CTL_EVENT_TYPE_SHIFT); 7641 bool reset_required, reset_direct = false; 7642 u8 cause; 7643 int rc; 7644 7645 if (event_type >= GAUDI_EVENT_SIZE) { 7646 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7647 event_type, GAUDI_EVENT_SIZE - 1); 7648 return; 7649 } 7650 7651 gaudi->events_stat[event_type]++; 7652 gaudi->events_stat_aggregate[event_type]++; 7653 7654 switch (event_type) { 7655 case GAUDI_EVENT_PCIE_CORE_DERR: 7656 case GAUDI_EVENT_PCIE_IF_DERR: 7657 case GAUDI_EVENT_PCIE_PHY_DERR: 7658 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7659 case GAUDI_EVENT_MME0_ACC_DERR: 7660 case GAUDI_EVENT_MME0_SBAB_DERR: 7661 case GAUDI_EVENT_MME1_ACC_DERR: 7662 case GAUDI_EVENT_MME1_SBAB_DERR: 7663 case GAUDI_EVENT_MME2_ACC_DERR: 7664 case GAUDI_EVENT_MME2_SBAB_DERR: 7665 case GAUDI_EVENT_MME3_ACC_DERR: 7666 case GAUDI_EVENT_MME3_SBAB_DERR: 7667 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7668 fallthrough; 7669 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7670 case GAUDI_EVENT_PSOC_MEM_DERR: 7671 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7672 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7673 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7674 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7675 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7676 case GAUDI_EVENT_MMU_DERR: 7677 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7678 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7679 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7680 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7681 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7682 goto reset_device; 7683 7684 case GAUDI_EVENT_GIC500: 7685 case GAUDI_EVENT_AXI_ECC: 7686 case GAUDI_EVENT_L2_RAM_ECC: 7687 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7688 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7689 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7690 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7691 goto reset_device; 7692 7693 case GAUDI_EVENT_HBM0_SPI_0: 7694 case GAUDI_EVENT_HBM1_SPI_0: 7695 case GAUDI_EVENT_HBM2_SPI_0: 7696 case GAUDI_EVENT_HBM3_SPI_0: 7697 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7698 gaudi_hbm_read_interrupts(hdev, 7699 gaudi_hbm_event_to_dev(event_type), 7700 &eq_entry->hbm_ecc_data); 7701 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7702 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7703 goto reset_device; 7704 7705 case GAUDI_EVENT_HBM0_SPI_1: 7706 case GAUDI_EVENT_HBM1_SPI_1: 7707 case GAUDI_EVENT_HBM2_SPI_1: 7708 case GAUDI_EVENT_HBM3_SPI_1: 7709 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7710 gaudi_hbm_read_interrupts(hdev, 7711 gaudi_hbm_event_to_dev(event_type), 7712 &eq_entry->hbm_ecc_data); 7713 hl_fw_unmask_irq(hdev, event_type); 7714 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7715 break; 7716 7717 case GAUDI_EVENT_TPC0_DEC: 7718 case GAUDI_EVENT_TPC1_DEC: 7719 case GAUDI_EVENT_TPC2_DEC: 7720 case GAUDI_EVENT_TPC3_DEC: 7721 case GAUDI_EVENT_TPC4_DEC: 7722 case GAUDI_EVENT_TPC5_DEC: 7723 case GAUDI_EVENT_TPC6_DEC: 7724 case GAUDI_EVENT_TPC7_DEC: 7725 /* In TPC DEC event, notify on TPC assertion. While there isn't 7726 * a specific event for assertion yet, the FW generates TPC DEC event. 7727 * The SW upper layer will inspect an internal mapped area to indicate 7728 * if the event is a TPC Assertion or a "real" TPC DEC. 7729 */ 7730 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7731 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7732 reset_required = gaudi_tpc_read_interrupts(hdev, 7733 tpc_dec_event_to_tpc_id(event_type), 7734 "AXI_SLV_DEC_Error"); 7735 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7736 if (reset_required) { 7737 dev_err(hdev->dev, "reset required due to %s\n", 7738 gaudi_irq_map_table[event_type].name); 7739 7740 reset_direct = true; 7741 goto reset_device; 7742 } else { 7743 hl_fw_unmask_irq(hdev, event_type); 7744 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7745 } 7746 break; 7747 7748 case GAUDI_EVENT_TPC0_KRN_ERR: 7749 case GAUDI_EVENT_TPC1_KRN_ERR: 7750 case GAUDI_EVENT_TPC2_KRN_ERR: 7751 case GAUDI_EVENT_TPC3_KRN_ERR: 7752 case GAUDI_EVENT_TPC4_KRN_ERR: 7753 case GAUDI_EVENT_TPC5_KRN_ERR: 7754 case GAUDI_EVENT_TPC6_KRN_ERR: 7755 case GAUDI_EVENT_TPC7_KRN_ERR: 7756 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7757 reset_required = gaudi_tpc_read_interrupts(hdev, 7758 tpc_krn_event_to_tpc_id(event_type), 7759 "KRN_ERR"); 7760 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7761 if (reset_required) { 7762 dev_err(hdev->dev, "reset required due to %s\n", 7763 gaudi_irq_map_table[event_type].name); 7764 7765 reset_direct = true; 7766 goto reset_device; 7767 } else { 7768 hl_fw_unmask_irq(hdev, event_type); 7769 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7770 } 7771 break; 7772 7773 case GAUDI_EVENT_PCIE_CORE_SERR: 7774 case GAUDI_EVENT_PCIE_IF_SERR: 7775 case GAUDI_EVENT_PCIE_PHY_SERR: 7776 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7777 case GAUDI_EVENT_MME0_ACC_SERR: 7778 case GAUDI_EVENT_MME0_SBAB_SERR: 7779 case GAUDI_EVENT_MME1_ACC_SERR: 7780 case GAUDI_EVENT_MME1_SBAB_SERR: 7781 case GAUDI_EVENT_MME2_ACC_SERR: 7782 case GAUDI_EVENT_MME2_SBAB_SERR: 7783 case GAUDI_EVENT_MME3_ACC_SERR: 7784 case GAUDI_EVENT_MME3_SBAB_SERR: 7785 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7786 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7787 case GAUDI_EVENT_PSOC_MEM_SERR: 7788 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7789 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7790 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7791 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7792 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7793 fallthrough; 7794 case GAUDI_EVENT_MMU_SERR: 7795 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7796 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7797 hl_fw_unmask_irq(hdev, event_type); 7798 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7799 break; 7800 7801 case GAUDI_EVENT_PCIE_DEC: 7802 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7803 case GAUDI_EVENT_PSOC_AXI_DEC: 7804 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7805 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7806 hl_fw_unmask_irq(hdev, event_type); 7807 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7808 break; 7809 7810 case GAUDI_EVENT_MMU_PAGE_FAULT: 7811 case GAUDI_EVENT_MMU_WR_PERM: 7812 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7813 hl_fw_unmask_irq(hdev, event_type); 7814 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7815 break; 7816 7817 case GAUDI_EVENT_MME0_WBC_RSP: 7818 case GAUDI_EVENT_MME0_SBAB0_RSP: 7819 case GAUDI_EVENT_MME1_WBC_RSP: 7820 case GAUDI_EVENT_MME1_SBAB0_RSP: 7821 case GAUDI_EVENT_MME2_WBC_RSP: 7822 case GAUDI_EVENT_MME2_SBAB0_RSP: 7823 case GAUDI_EVENT_MME3_WBC_RSP: 7824 case GAUDI_EVENT_MME3_SBAB0_RSP: 7825 case GAUDI_EVENT_RAZWI_OR_ADC: 7826 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7827 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7828 fallthrough; 7829 case GAUDI_EVENT_NIC0_QM0: 7830 case GAUDI_EVENT_NIC0_QM1: 7831 case GAUDI_EVENT_NIC1_QM0: 7832 case GAUDI_EVENT_NIC1_QM1: 7833 case GAUDI_EVENT_NIC2_QM0: 7834 case GAUDI_EVENT_NIC2_QM1: 7835 case GAUDI_EVENT_NIC3_QM0: 7836 case GAUDI_EVENT_NIC3_QM1: 7837 case GAUDI_EVENT_NIC4_QM0: 7838 case GAUDI_EVENT_NIC4_QM1: 7839 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7840 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7841 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7842 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7843 hl_fw_unmask_irq(hdev, event_type); 7844 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7845 break; 7846 7847 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7848 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7849 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7850 goto reset_device; 7851 7852 case GAUDI_EVENT_TPC0_BMON_SPMU: 7853 case GAUDI_EVENT_TPC1_BMON_SPMU: 7854 case GAUDI_EVENT_TPC2_BMON_SPMU: 7855 case GAUDI_EVENT_TPC3_BMON_SPMU: 7856 case GAUDI_EVENT_TPC4_BMON_SPMU: 7857 case GAUDI_EVENT_TPC5_BMON_SPMU: 7858 case GAUDI_EVENT_TPC6_BMON_SPMU: 7859 case GAUDI_EVENT_TPC7_BMON_SPMU: 7860 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7861 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7862 hl_fw_unmask_irq(hdev, event_type); 7863 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7864 break; 7865 7866 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7867 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7868 hl_fw_unmask_irq(hdev, event_type); 7869 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7870 break; 7871 7872 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7873 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7874 gaudi_print_sm_sei_info(hdev, event_type, 7875 &eq_entry->sm_sei_data); 7876 rc = hl_state_dump(hdev); 7877 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7878 if (rc) 7879 dev_err(hdev->dev, 7880 "Error during system state dump %d\n", rc); 7881 hl_fw_unmask_irq(hdev, event_type); 7882 break; 7883 7884 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7885 break; 7886 7887 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7888 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7889 hl_fw_unmask_irq(hdev, event_type); 7890 break; 7891 7892 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7893 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7894 dev_err(hdev->dev, 7895 "Received high temp H/W interrupt %d (cause %d)\n", 7896 event_type, cause); 7897 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7898 break; 7899 7900 case GAUDI_EVENT_DEV_RESET_REQ: 7901 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7902 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7903 goto reset_device; 7904 7905 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7906 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7907 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7908 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7909 goto reset_device; 7910 7911 case GAUDI_EVENT_FW_ALIVE_S: 7912 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7913 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7914 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7915 goto reset_device; 7916 7917 default: 7918 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7919 event_type); 7920 break; 7921 } 7922 7923 if (event_mask) 7924 hl_notifier_event_send_all(hdev, event_mask); 7925 7926 return; 7927 7928 reset_device: 7929 reset_required = true; 7930 7931 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7932 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7933 7934 /* notify on device unavailable while the reset triggered by fw */ 7935 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7936 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7937 } else if (hdev->hard_reset_on_fw_events) { 7938 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7939 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7940 } else { 7941 reset_required = false; 7942 } 7943 7944 if (reset_required) { 7945 hl_device_cond_reset(hdev, flags, event_mask); 7946 } else { 7947 hl_fw_unmask_irq(hdev, event_type); 7948 /* Notification on occurred event needs to be sent although reset is not executed */ 7949 if (event_mask) 7950 hl_notifier_event_send_all(hdev, event_mask); 7951 } 7952 } 7953 7954 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7955 { 7956 struct gaudi_device *gaudi = hdev->asic_specific; 7957 7958 if (aggregate) { 7959 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7960 return gaudi->events_stat_aggregate; 7961 } 7962 7963 *size = (u32) sizeof(gaudi->events_stat); 7964 return gaudi->events_stat; 7965 } 7966 7967 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7968 { 7969 struct gaudi_device *gaudi = hdev->asic_specific; 7970 u32 status, timeout_usec; 7971 int rc; 7972 7973 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7974 hdev->reset_info.hard_reset_pending) 7975 return 0; 7976 7977 if (hdev->pldm) 7978 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7979 else 7980 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7981 7982 /* L0 & L1 invalidation */ 7983 WREG32(mmSTLB_INV_PS, 3); 7984 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7985 WREG32(mmSTLB_INV_PS, 2); 7986 7987 rc = hl_poll_timeout( 7988 hdev, 7989 mmSTLB_INV_PS, 7990 status, 7991 !status, 7992 1000, 7993 timeout_usec); 7994 7995 WREG32(mmSTLB_INV_SET, 0); 7996 7997 return rc; 7998 } 7999 8000 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 8001 bool is_hard, u32 flags, 8002 u32 asid, u64 va, u64 size) 8003 { 8004 /* Treat as invalidate all because there is no range invalidation 8005 * in Gaudi 8006 */ 8007 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 8008 } 8009 8010 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 8011 { 8012 u32 status, timeout_usec; 8013 int rc; 8014 8015 if (hdev->pldm) 8016 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 8017 else 8018 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 8019 8020 WREG32(MMU_ASID, asid); 8021 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 8022 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 8023 WREG32(MMU_BUSY, 0x80000000); 8024 8025 rc = hl_poll_timeout( 8026 hdev, 8027 MMU_BUSY, 8028 status, 8029 !(status & 0x80000000), 8030 1000, 8031 timeout_usec); 8032 8033 if (rc) { 8034 dev_err(hdev->dev, 8035 "Timeout during MMU hop0 config of asid %d\n", asid); 8036 return rc; 8037 } 8038 8039 return 0; 8040 } 8041 8042 static int gaudi_send_heartbeat(struct hl_device *hdev) 8043 { 8044 struct gaudi_device *gaudi = hdev->asic_specific; 8045 8046 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8047 return 0; 8048 8049 return hl_fw_send_heartbeat(hdev); 8050 } 8051 8052 static int gaudi_cpucp_info_get(struct hl_device *hdev) 8053 { 8054 struct gaudi_device *gaudi = hdev->asic_specific; 8055 struct asic_fixed_properties *prop = &hdev->asic_prop; 8056 int rc; 8057 8058 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8059 return 0; 8060 8061 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8062 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8063 mmCPU_BOOT_ERR1); 8064 if (rc) 8065 return rc; 8066 8067 if (!strlen(prop->cpucp_info.card_name)) 8068 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8069 CARD_NAME_MAX_LEN); 8070 8071 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8072 8073 set_default_power_values(hdev); 8074 8075 return 0; 8076 } 8077 8078 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8079 struct engines_data *e) 8080 { 8081 struct gaudi_device *gaudi = hdev->asic_specific; 8082 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8083 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8084 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8085 unsigned long *mask = (unsigned long *)mask_arr; 8086 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8087 bool is_idle = true, is_eng_idle, is_slave; 8088 u64 offset; 8089 int i, dma_id, port; 8090 8091 if (e) 8092 hl_engine_data_sprintf(e, 8093 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8094 "--- ------- ------------ ---------- -------------\n"); 8095 8096 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8097 dma_id = gaudi_dma_assignment[i]; 8098 offset = dma_id * DMA_QMAN_OFFSET; 8099 8100 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8101 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8102 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8103 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8104 IS_DMA_IDLE(dma_core_sts0); 8105 is_idle &= is_eng_idle; 8106 8107 if (mask && !is_eng_idle) 8108 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8109 if (e) 8110 hl_engine_data_sprintf(e, fmt, dma_id, 8111 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8112 qm_cgm_sts, dma_core_sts0); 8113 } 8114 8115 if (e) 8116 hl_engine_data_sprintf(e, 8117 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8118 "--- ------- ------------ ---------- ----------\n"); 8119 8120 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8121 offset = i * TPC_QMAN_OFFSET; 8122 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8123 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8124 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8125 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8126 IS_TPC_IDLE(tpc_cfg_sts); 8127 is_idle &= is_eng_idle; 8128 8129 if (mask && !is_eng_idle) 8130 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8131 if (e) 8132 hl_engine_data_sprintf(e, fmt, i, 8133 is_eng_idle ? "Y" : "N", 8134 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8135 } 8136 8137 if (e) 8138 hl_engine_data_sprintf(e, 8139 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8140 "--- ------- ------------ ---------- -----------\n"); 8141 8142 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8143 offset = i * MME_QMAN_OFFSET; 8144 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8145 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8146 8147 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8148 is_slave = i % 2; 8149 if (!is_slave) { 8150 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8151 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8152 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8153 } 8154 8155 is_idle &= is_eng_idle; 8156 8157 if (mask && !is_eng_idle) 8158 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8159 if (e) { 8160 if (!is_slave) 8161 hl_engine_data_sprintf(e, fmt, i, 8162 is_eng_idle ? "Y" : "N", 8163 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8164 else 8165 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8166 is_eng_idle ? "Y" : "N", "-", 8167 "-", mme_arch_sts); 8168 } 8169 } 8170 8171 if (e) 8172 hl_engine_data_sprintf(e, 8173 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8174 "--- ------- ------------ ----------\n"); 8175 8176 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8177 offset = i * NIC_MACRO_QMAN_OFFSET; 8178 port = 2 * i; 8179 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8180 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8181 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8182 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8183 is_idle &= is_eng_idle; 8184 8185 if (mask && !is_eng_idle) 8186 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8187 if (e) 8188 hl_engine_data_sprintf(e, nic_fmt, port, 8189 is_eng_idle ? "Y" : "N", 8190 qm_glbl_sts0, qm_cgm_sts); 8191 } 8192 8193 port = 2 * i + 1; 8194 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8195 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8196 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8197 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8198 is_idle &= is_eng_idle; 8199 8200 if (mask && !is_eng_idle) 8201 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8202 if (e) 8203 hl_engine_data_sprintf(e, nic_fmt, port, 8204 is_eng_idle ? "Y" : "N", 8205 qm_glbl_sts0, qm_cgm_sts); 8206 } 8207 } 8208 8209 if (e) 8210 hl_engine_data_sprintf(e, "\n"); 8211 8212 return is_idle; 8213 } 8214 8215 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8216 __acquires(&gaudi->hw_queues_lock) 8217 { 8218 struct gaudi_device *gaudi = hdev->asic_specific; 8219 8220 spin_lock(&gaudi->hw_queues_lock); 8221 } 8222 8223 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8224 __releases(&gaudi->hw_queues_lock) 8225 { 8226 struct gaudi_device *gaudi = hdev->asic_specific; 8227 8228 spin_unlock(&gaudi->hw_queues_lock); 8229 } 8230 8231 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8232 { 8233 return hdev->pdev->device; 8234 } 8235 8236 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8237 size_t max_size) 8238 { 8239 struct gaudi_device *gaudi = hdev->asic_specific; 8240 8241 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8242 return 0; 8243 8244 return hl_fw_get_eeprom_data(hdev, data, max_size); 8245 } 8246 8247 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8248 { 8249 struct gaudi_device *gaudi = hdev->asic_specific; 8250 8251 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8252 return 0; 8253 8254 return hl_fw_get_monitor_dump(hdev, data); 8255 } 8256 8257 /* 8258 * this function should be used only during initialization and/or after reset, 8259 * when there are no active users. 8260 */ 8261 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8262 { 8263 u64 kernel_timeout; 8264 u32 status, offset; 8265 int rc; 8266 8267 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8268 8269 if (hdev->pldm) 8270 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8271 else 8272 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8273 8274 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8275 lower_32_bits(tpc_kernel)); 8276 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8277 upper_32_bits(tpc_kernel)); 8278 8279 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8280 lower_32_bits(tpc_kernel)); 8281 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8282 upper_32_bits(tpc_kernel)); 8283 /* set a valid LUT pointer, content is of no significance */ 8284 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8285 lower_32_bits(tpc_kernel)); 8286 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8287 upper_32_bits(tpc_kernel)); 8288 8289 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8290 lower_32_bits(CFG_BASE + 8291 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8292 8293 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8294 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8295 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8296 /* wait a bit for the engine to start executing */ 8297 usleep_range(1000, 1500); 8298 8299 /* wait until engine has finished executing */ 8300 rc = hl_poll_timeout( 8301 hdev, 8302 mmTPC0_CFG_STATUS + offset, 8303 status, 8304 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8305 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8306 1000, 8307 kernel_timeout); 8308 8309 if (rc) { 8310 dev_err(hdev->dev, 8311 "Timeout while waiting for TPC%d icache prefetch\n", 8312 tpc_id); 8313 return -EIO; 8314 } 8315 8316 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8317 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8318 8319 /* wait a bit for the engine to start executing */ 8320 usleep_range(1000, 1500); 8321 8322 /* wait until engine has finished executing */ 8323 rc = hl_poll_timeout( 8324 hdev, 8325 mmTPC0_CFG_STATUS + offset, 8326 status, 8327 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8328 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8329 1000, 8330 kernel_timeout); 8331 8332 if (rc) { 8333 dev_err(hdev->dev, 8334 "Timeout while waiting for TPC%d vector pipe\n", 8335 tpc_id); 8336 return -EIO; 8337 } 8338 8339 rc = hl_poll_timeout( 8340 hdev, 8341 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8342 status, 8343 (status == 0), 8344 1000, 8345 kernel_timeout); 8346 8347 if (rc) { 8348 dev_err(hdev->dev, 8349 "Timeout while waiting for TPC%d kernel to execute\n", 8350 tpc_id); 8351 return -EIO; 8352 } 8353 8354 return 0; 8355 } 8356 8357 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8358 struct hl_ctx *ctx) 8359 { 8360 struct gaudi_device *gaudi = hdev->asic_specific; 8361 int min_alloc_order, rc, collective_cb_size; 8362 8363 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8364 return 0; 8365 8366 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8367 HOST_SPACE_INTERNAL_CB_SZ, 8368 &hdev->internal_cb_pool_dma_addr, 8369 GFP_KERNEL | __GFP_ZERO); 8370 8371 if (!hdev->internal_cb_pool_virt_addr) 8372 return -ENOMEM; 8373 8374 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8375 sizeof(struct packet_fence); 8376 min_alloc_order = ilog2(collective_cb_size); 8377 8378 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8379 if (!hdev->internal_cb_pool) { 8380 dev_err(hdev->dev, 8381 "Failed to create internal CB pool\n"); 8382 rc = -ENOMEM; 8383 goto free_internal_cb_pool; 8384 } 8385 8386 rc = gen_pool_add(hdev->internal_cb_pool, 8387 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8388 HOST_SPACE_INTERNAL_CB_SZ, -1); 8389 if (rc) { 8390 dev_err(hdev->dev, 8391 "Failed to add memory to internal CB pool\n"); 8392 rc = -EFAULT; 8393 goto destroy_internal_cb_pool; 8394 } 8395 8396 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8397 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8398 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8399 8400 if (!hdev->internal_cb_va_base) { 8401 rc = -ENOMEM; 8402 goto destroy_internal_cb_pool; 8403 } 8404 8405 mutex_lock(&hdev->mmu_lock); 8406 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8407 hdev->internal_cb_pool_dma_addr, 8408 HOST_SPACE_INTERNAL_CB_SZ); 8409 8410 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8411 mutex_unlock(&hdev->mmu_lock); 8412 8413 if (rc) 8414 goto unreserve_internal_cb_pool; 8415 8416 return 0; 8417 8418 unreserve_internal_cb_pool: 8419 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8420 HOST_SPACE_INTERNAL_CB_SZ); 8421 destroy_internal_cb_pool: 8422 gen_pool_destroy(hdev->internal_cb_pool); 8423 free_internal_cb_pool: 8424 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8425 hdev->internal_cb_pool_dma_addr); 8426 8427 return rc; 8428 } 8429 8430 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8431 struct hl_ctx *ctx) 8432 { 8433 struct gaudi_device *gaudi = hdev->asic_specific; 8434 8435 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8436 return; 8437 8438 mutex_lock(&hdev->mmu_lock); 8439 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8440 HOST_SPACE_INTERNAL_CB_SZ); 8441 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8442 HOST_SPACE_INTERNAL_CB_SZ); 8443 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8444 mutex_unlock(&hdev->mmu_lock); 8445 8446 gen_pool_destroy(hdev->internal_cb_pool); 8447 8448 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8449 hdev->internal_cb_pool_dma_addr); 8450 } 8451 8452 static int gaudi_ctx_init(struct hl_ctx *ctx) 8453 { 8454 int rc; 8455 8456 if (ctx->asid == HL_KERNEL_ASID_ID) 8457 return 0; 8458 8459 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8460 if (rc) 8461 return rc; 8462 8463 rc = gaudi_restore_user_registers(ctx->hdev); 8464 if (rc) 8465 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8466 8467 return rc; 8468 } 8469 8470 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8471 { 8472 if (ctx->asid == HL_KERNEL_ASID_ID) 8473 return; 8474 8475 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8476 } 8477 8478 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8479 { 8480 return 0; 8481 } 8482 8483 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8484 { 8485 return gaudi_cq_assignment[cq_idx]; 8486 } 8487 8488 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8489 { 8490 return sizeof(struct packet_msg_short) + 8491 sizeof(struct packet_msg_prot) * 2; 8492 } 8493 8494 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8495 { 8496 return sizeof(struct packet_msg_short) * 4 + 8497 sizeof(struct packet_fence) + 8498 sizeof(struct packet_msg_prot) * 2; 8499 } 8500 8501 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8502 { 8503 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8504 } 8505 8506 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8507 u32 size, bool eb) 8508 { 8509 struct hl_cb *cb = (struct hl_cb *) data; 8510 struct packet_msg_short *pkt; 8511 u32 value, ctl, pkt_size = sizeof(*pkt); 8512 8513 pkt = cb->kernel_address + size; 8514 memset(pkt, 0, pkt_size); 8515 8516 /* Inc by 1, Mode ADD */ 8517 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8518 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8519 8520 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8521 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8522 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8523 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8524 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8525 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8526 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8527 8528 pkt->value = cpu_to_le32(value); 8529 pkt->ctl = cpu_to_le32(ctl); 8530 8531 return size + pkt_size; 8532 } 8533 8534 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8535 u16 addr) 8536 { 8537 u32 ctl, pkt_size = sizeof(*pkt); 8538 8539 memset(pkt, 0, pkt_size); 8540 8541 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8542 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8543 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8544 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8545 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8546 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8547 8548 pkt->value = cpu_to_le32(value); 8549 pkt->ctl = cpu_to_le32(ctl); 8550 8551 return pkt_size; 8552 } 8553 8554 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8555 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8556 u16 sob_val, u16 mon_id) 8557 { 8558 u64 monitor_base; 8559 u32 ctl, value, pkt_size = sizeof(*pkt); 8560 u16 msg_addr_offset; 8561 u8 mask; 8562 8563 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8564 dev_err(hdev->dev, 8565 "sob_base %u (mask %#x) is not valid\n", 8566 sob_base, sob_mask); 8567 return 0; 8568 } 8569 8570 /* 8571 * monitor_base should be the content of the base0 address registers, 8572 * so it will be added to the msg short offsets 8573 */ 8574 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8575 8576 msg_addr_offset = 8577 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8578 monitor_base; 8579 8580 memset(pkt, 0, pkt_size); 8581 8582 /* Monitor config packet: bind the monitor to a sync object */ 8583 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8584 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8585 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8586 0); /* GREATER OR EQUAL*/ 8587 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8588 8589 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8590 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8591 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8592 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8593 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8594 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8595 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8596 8597 pkt->value = cpu_to_le32(value); 8598 pkt->ctl = cpu_to_le32(ctl); 8599 8600 return pkt_size; 8601 } 8602 8603 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8604 { 8605 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8606 8607 memset(pkt, 0, pkt_size); 8608 8609 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8610 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8611 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8612 8613 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8614 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8615 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8616 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8617 8618 pkt->cfg = cpu_to_le32(cfg); 8619 pkt->ctl = cpu_to_le32(ctl); 8620 8621 return pkt_size; 8622 } 8623 8624 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8625 { 8626 u32 offset, nic_index; 8627 8628 switch (queue_id) { 8629 case GAUDI_QUEUE_ID_DMA_0_0: 8630 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8631 break; 8632 case GAUDI_QUEUE_ID_DMA_0_1: 8633 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8634 break; 8635 case GAUDI_QUEUE_ID_DMA_0_2: 8636 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8637 break; 8638 case GAUDI_QUEUE_ID_DMA_0_3: 8639 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8640 break; 8641 case GAUDI_QUEUE_ID_DMA_1_0: 8642 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8643 break; 8644 case GAUDI_QUEUE_ID_DMA_1_1: 8645 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8646 break; 8647 case GAUDI_QUEUE_ID_DMA_1_2: 8648 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8649 break; 8650 case GAUDI_QUEUE_ID_DMA_1_3: 8651 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8652 break; 8653 case GAUDI_QUEUE_ID_DMA_5_0: 8654 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8655 break; 8656 case GAUDI_QUEUE_ID_DMA_5_1: 8657 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8658 break; 8659 case GAUDI_QUEUE_ID_DMA_5_2: 8660 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8661 break; 8662 case GAUDI_QUEUE_ID_DMA_5_3: 8663 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8664 break; 8665 case GAUDI_QUEUE_ID_TPC_7_0: 8666 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8667 break; 8668 case GAUDI_QUEUE_ID_TPC_7_1: 8669 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8670 break; 8671 case GAUDI_QUEUE_ID_TPC_7_2: 8672 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8673 break; 8674 case GAUDI_QUEUE_ID_TPC_7_3: 8675 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8676 break; 8677 case GAUDI_QUEUE_ID_NIC_0_0: 8678 case GAUDI_QUEUE_ID_NIC_1_0: 8679 case GAUDI_QUEUE_ID_NIC_2_0: 8680 case GAUDI_QUEUE_ID_NIC_3_0: 8681 case GAUDI_QUEUE_ID_NIC_4_0: 8682 case GAUDI_QUEUE_ID_NIC_5_0: 8683 case GAUDI_QUEUE_ID_NIC_6_0: 8684 case GAUDI_QUEUE_ID_NIC_7_0: 8685 case GAUDI_QUEUE_ID_NIC_8_0: 8686 case GAUDI_QUEUE_ID_NIC_9_0: 8687 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8688 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8689 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8690 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8691 break; 8692 case GAUDI_QUEUE_ID_NIC_0_1: 8693 case GAUDI_QUEUE_ID_NIC_1_1: 8694 case GAUDI_QUEUE_ID_NIC_2_1: 8695 case GAUDI_QUEUE_ID_NIC_3_1: 8696 case GAUDI_QUEUE_ID_NIC_4_1: 8697 case GAUDI_QUEUE_ID_NIC_5_1: 8698 case GAUDI_QUEUE_ID_NIC_6_1: 8699 case GAUDI_QUEUE_ID_NIC_7_1: 8700 case GAUDI_QUEUE_ID_NIC_8_1: 8701 case GAUDI_QUEUE_ID_NIC_9_1: 8702 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8703 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8704 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8705 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8706 break; 8707 case GAUDI_QUEUE_ID_NIC_0_2: 8708 case GAUDI_QUEUE_ID_NIC_1_2: 8709 case GAUDI_QUEUE_ID_NIC_2_2: 8710 case GAUDI_QUEUE_ID_NIC_3_2: 8711 case GAUDI_QUEUE_ID_NIC_4_2: 8712 case GAUDI_QUEUE_ID_NIC_5_2: 8713 case GAUDI_QUEUE_ID_NIC_6_2: 8714 case GAUDI_QUEUE_ID_NIC_7_2: 8715 case GAUDI_QUEUE_ID_NIC_8_2: 8716 case GAUDI_QUEUE_ID_NIC_9_2: 8717 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8718 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8719 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8720 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8721 break; 8722 case GAUDI_QUEUE_ID_NIC_0_3: 8723 case GAUDI_QUEUE_ID_NIC_1_3: 8724 case GAUDI_QUEUE_ID_NIC_2_3: 8725 case GAUDI_QUEUE_ID_NIC_3_3: 8726 case GAUDI_QUEUE_ID_NIC_4_3: 8727 case GAUDI_QUEUE_ID_NIC_5_3: 8728 case GAUDI_QUEUE_ID_NIC_6_3: 8729 case GAUDI_QUEUE_ID_NIC_7_3: 8730 case GAUDI_QUEUE_ID_NIC_8_3: 8731 case GAUDI_QUEUE_ID_NIC_9_3: 8732 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8733 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8734 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8735 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8736 break; 8737 default: 8738 return -EINVAL; 8739 } 8740 8741 *addr = CFG_BASE + offset; 8742 8743 return 0; 8744 } 8745 8746 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8747 { 8748 u64 monitor_base; 8749 u32 size = 0; 8750 u16 msg_addr_offset; 8751 8752 /* 8753 * monitor_base should be the content of the base0 address registers, 8754 * so it will be added to the msg short offsets 8755 */ 8756 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8757 8758 /* First monitor config packet: low address of the sync */ 8759 msg_addr_offset = 8760 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8761 monitor_base; 8762 8763 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8764 msg_addr_offset); 8765 8766 /* Second monitor config packet: high address of the sync */ 8767 msg_addr_offset = 8768 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8769 monitor_base; 8770 8771 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8772 msg_addr_offset); 8773 8774 /* 8775 * Third monitor config packet: the payload, i.e. what to write when the 8776 * sync triggers 8777 */ 8778 msg_addr_offset = 8779 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8780 monitor_base; 8781 8782 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8783 8784 return size; 8785 } 8786 8787 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8788 struct hl_gen_wait_properties *prop) 8789 { 8790 struct hl_cb *cb = (struct hl_cb *) prop->data; 8791 void *buf = cb->kernel_address; 8792 u64 fence_addr = 0; 8793 u32 size = prop->size; 8794 8795 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8796 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8797 prop->q_idx); 8798 return 0; 8799 } 8800 8801 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8802 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8803 prop->sob_mask, prop->sob_val, prop->mon_id); 8804 size += gaudi_add_fence_pkt(buf + size); 8805 8806 return size; 8807 } 8808 8809 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8810 { 8811 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8812 8813 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8814 hw_sob->sob_id); 8815 8816 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8817 hw_sob->sob_id * 4, 0); 8818 8819 kref_init(&hw_sob->kref); 8820 } 8821 8822 static u64 gaudi_get_device_time(struct hl_device *hdev) 8823 { 8824 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8825 8826 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8827 } 8828 8829 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8830 u32 *block_size, u32 *block_id) 8831 { 8832 return -EPERM; 8833 } 8834 8835 static int gaudi_block_mmap(struct hl_device *hdev, 8836 struct vm_area_struct *vma, 8837 u32 block_id, u32 block_size) 8838 { 8839 return -EPERM; 8840 } 8841 8842 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8843 { 8844 struct cpu_dyn_regs *dyn_regs = 8845 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8846 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8847 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8848 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8849 8850 WREG32(irq_handler_offset, 8851 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8852 } 8853 8854 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8855 { 8856 return -EINVAL; 8857 } 8858 8859 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8860 { 8861 switch (pll_idx) { 8862 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8863 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8864 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8865 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8866 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8867 case HL_GAUDI_MME_PLL: return MME_PLL; 8868 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8869 case HL_GAUDI_IF_PLL: return IF_PLL; 8870 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8871 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8872 default: return -EINVAL; 8873 } 8874 } 8875 8876 static int gaudi_add_sync_to_engine_map_entry( 8877 struct hl_sync_to_engine_map *map, u32 reg_value, 8878 enum hl_sync_engine_type engine_type, u32 engine_id) 8879 { 8880 struct hl_sync_to_engine_map_entry *entry; 8881 8882 /* Reg value represents a partial address of sync object, 8883 * it is used as unique identifier. For this we need to 8884 * clear the cutoff cfg base bits from the value. 8885 */ 8886 if (reg_value == 0 || reg_value == 0xffffffff) 8887 return 0; 8888 reg_value -= lower_32_bits(CFG_BASE); 8889 8890 /* create a new hash entry */ 8891 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8892 if (!entry) 8893 return -ENOMEM; 8894 entry->engine_type = engine_type; 8895 entry->engine_id = engine_id; 8896 entry->sync_id = reg_value; 8897 hash_add(map->tb, &entry->node, reg_value); 8898 8899 return 0; 8900 } 8901 8902 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8903 struct hl_sync_to_engine_map *map) 8904 { 8905 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8906 int i, j, rc; 8907 u32 reg_value; 8908 8909 /* Iterate over TPC engines */ 8910 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8911 8912 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8913 sds->props[SP_NEXT_TPC] * i); 8914 8915 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8916 ENGINE_TPC, i); 8917 if (rc) 8918 goto free_sync_to_engine_map; 8919 } 8920 8921 /* Iterate over MME engines */ 8922 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8923 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8924 8925 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8926 sds->props[SP_NEXT_MME] * i + 8927 j * sizeof(u32)); 8928 8929 rc = gaudi_add_sync_to_engine_map_entry( 8930 map, reg_value, ENGINE_MME, 8931 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8932 if (rc) 8933 goto free_sync_to_engine_map; 8934 } 8935 } 8936 8937 /* Iterate over DMA engines */ 8938 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8939 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8940 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8941 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8942 ENGINE_DMA, i); 8943 if (rc) 8944 goto free_sync_to_engine_map; 8945 } 8946 8947 return 0; 8948 8949 free_sync_to_engine_map: 8950 hl_state_dump_free_sync_to_engine_map(map); 8951 8952 return rc; 8953 } 8954 8955 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8956 { 8957 return FIELD_GET( 8958 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8959 mon->status); 8960 } 8961 8962 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8963 { 8964 const size_t max_write = 10; 8965 u32 gid, mask, sob; 8966 int i, offset; 8967 8968 /* Sync object ID is calculated as follows: 8969 * (8 * group_id + cleared bits in mask) 8970 */ 8971 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8972 mon->arm_data); 8973 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8974 mon->arm_data); 8975 8976 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8977 max_write; mask >>= 1, i++) { 8978 if (!(mask & 1)) { 8979 sob = gid * MONITOR_MAX_SOBS + i; 8980 8981 if (offset > 0) 8982 offset += snprintf(sobs + offset, max_write, 8983 ", "); 8984 8985 offset += snprintf(sobs + offset, max_write, "%u", sob); 8986 } 8987 } 8988 } 8989 8990 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8991 struct hl_device *hdev, 8992 struct hl_mon_state_dump *mon) 8993 { 8994 const char *name; 8995 char scratch_buf1[BIN_REG_STRING_SIZE], 8996 scratch_buf2[BIN_REG_STRING_SIZE]; 8997 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8998 8999 name = hl_state_dump_get_monitor_name(hdev, mon); 9000 if (!name) 9001 name = ""; 9002 9003 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 9004 9005 return hl_snprintf_resize( 9006 buf, size, offset, 9007 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 9008 mon->id, name, 9009 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 9010 mon->arm_data), 9011 hl_format_as_binary( 9012 scratch_buf1, sizeof(scratch_buf1), 9013 FIELD_GET( 9014 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 9015 mon->arm_data)), 9016 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 9017 mon->arm_data), 9018 mon->wr_data, 9019 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 9020 hl_format_as_binary( 9021 scratch_buf2, sizeof(scratch_buf2), 9022 FIELD_GET( 9023 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 9024 mon->status)), 9025 monitored_sobs); 9026 } 9027 9028 9029 static int gaudi_print_fences_single_engine( 9030 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 9031 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 9032 size_t *size, size_t *offset) 9033 { 9034 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9035 int rc = -ENOMEM, i; 9036 u32 *statuses, *fences; 9037 9038 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 9039 sizeof(*statuses), GFP_KERNEL); 9040 if (!statuses) 9041 goto out; 9042 9043 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 9044 sds->props[SP_ENGINE_NUM_OF_QUEUES], 9045 sizeof(*fences), GFP_KERNEL); 9046 if (!fences) 9047 goto free_status; 9048 9049 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9050 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9051 9052 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9053 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9054 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9055 9056 /* The actual print */ 9057 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9058 u32 fence_id; 9059 u64 fence_cnt, fence_rdata; 9060 const char *engine_name; 9061 9062 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9063 statuses[i])) 9064 continue; 9065 9066 fence_id = 9067 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9068 fence_cnt = base_offset + CFG_BASE + 9069 sizeof(u32) * 9070 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9071 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9072 sds->props[SP_FENCE0_RDATA_OFFSET]; 9073 engine_name = hl_sync_engine_to_string(engine_type); 9074 9075 rc = hl_snprintf_resize( 9076 buf, size, offset, 9077 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9078 engine_name, engine_id, 9079 i, fence_id, 9080 fence_cnt, engine_name, engine_id, fence_id, i, 9081 fence_rdata, engine_name, engine_id, fence_id, i, 9082 fences[fence_id], 9083 statuses[i]); 9084 if (rc) 9085 goto free_fences; 9086 } 9087 9088 rc = 0; 9089 9090 free_fences: 9091 kfree(fences); 9092 free_status: 9093 kfree(statuses); 9094 out: 9095 return rc; 9096 } 9097 9098 9099 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9100 .monitor_valid = gaudi_monitor_valid, 9101 .print_single_monitor = gaudi_print_single_monitor, 9102 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9103 .print_fences_single_engine = gaudi_print_fences_single_engine, 9104 }; 9105 9106 static void gaudi_state_dump_init(struct hl_device *hdev) 9107 { 9108 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9109 int i; 9110 9111 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9112 hash_add(sds->so_id_to_str_tb, 9113 &gaudi_so_id_to_str[i].node, 9114 gaudi_so_id_to_str[i].id); 9115 9116 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9117 hash_add(sds->monitor_id_to_str_tb, 9118 &gaudi_monitor_id_to_str[i].node, 9119 gaudi_monitor_id_to_str[i].id); 9120 9121 sds->props = gaudi_state_dump_specs_props; 9122 9123 sds->sync_namager_names = gaudi_sync_manager_names; 9124 9125 sds->funcs = gaudi_state_dump_funcs; 9126 } 9127 9128 static u32 *gaudi_get_stream_master_qid_arr(void) 9129 { 9130 return gaudi_stream_master; 9131 } 9132 9133 static int gaudi_set_dram_properties(struct hl_device *hdev) 9134 { 9135 return 0; 9136 } 9137 9138 static int gaudi_set_binning_masks(struct hl_device *hdev) 9139 { 9140 return 0; 9141 } 9142 9143 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9144 { 9145 } 9146 9147 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9148 { 9149 struct hl_device *hdev = dev_get_drvdata(dev); 9150 struct cpucp_info *cpucp_info; 9151 9152 cpucp_info = &hdev->asic_prop.cpucp_info; 9153 9154 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9155 } 9156 9157 static DEVICE_ATTR_RO(infineon_ver); 9158 9159 static struct attribute *gaudi_vrm_dev_attrs[] = { 9160 &dev_attr_infineon_ver.attr, 9161 NULL, 9162 }; 9163 9164 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9165 struct attribute_group *dev_vrm_attr_grp) 9166 { 9167 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9168 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9169 } 9170 9171 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9172 { 9173 return 0; 9174 } 9175 9176 static const struct hl_asic_funcs gaudi_funcs = { 9177 .early_init = gaudi_early_init, 9178 .early_fini = gaudi_early_fini, 9179 .late_init = gaudi_late_init, 9180 .late_fini = gaudi_late_fini, 9181 .sw_init = gaudi_sw_init, 9182 .sw_fini = gaudi_sw_fini, 9183 .hw_init = gaudi_hw_init, 9184 .hw_fini = gaudi_hw_fini, 9185 .halt_engines = gaudi_halt_engines, 9186 .suspend = gaudi_suspend, 9187 .resume = gaudi_resume, 9188 .mmap = gaudi_mmap, 9189 .ring_doorbell = gaudi_ring_doorbell, 9190 .pqe_write = gaudi_pqe_write, 9191 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9192 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9193 .scrub_device_mem = gaudi_scrub_device_mem, 9194 .scrub_device_dram = gaudi_scrub_device_dram, 9195 .get_int_queue_base = gaudi_get_int_queue_base, 9196 .test_queues = gaudi_test_queues, 9197 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9198 .asic_dma_pool_free = gaudi_dma_pool_free, 9199 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9200 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9201 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9202 .cs_parser = gaudi_cs_parser, 9203 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9204 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9205 .update_eq_ci = gaudi_update_eq_ci, 9206 .context_switch = gaudi_context_switch, 9207 .restore_phase_topology = gaudi_restore_phase_topology, 9208 .debugfs_read_dma = gaudi_debugfs_read_dma, 9209 .add_device_attr = gaudi_add_device_attr, 9210 .handle_eqe = gaudi_handle_eqe, 9211 .get_events_stat = gaudi_get_events_stat, 9212 .read_pte = gaudi_read_pte, 9213 .write_pte = gaudi_write_pte, 9214 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9215 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9216 .mmu_prefetch_cache_range = NULL, 9217 .send_heartbeat = gaudi_send_heartbeat, 9218 .debug_coresight = gaudi_debug_coresight, 9219 .is_device_idle = gaudi_is_device_idle, 9220 .compute_reset_late_init = gaudi_compute_reset_late_init, 9221 .hw_queues_lock = gaudi_hw_queues_lock, 9222 .hw_queues_unlock = gaudi_hw_queues_unlock, 9223 .get_pci_id = gaudi_get_pci_id, 9224 .get_eeprom_data = gaudi_get_eeprom_data, 9225 .get_monitor_dump = gaudi_get_monitor_dump, 9226 .send_cpu_message = gaudi_send_cpu_message, 9227 .pci_bars_map = gaudi_pci_bars_map, 9228 .init_iatu = gaudi_init_iatu, 9229 .rreg = hl_rreg, 9230 .wreg = hl_wreg, 9231 .halt_coresight = gaudi_halt_coresight, 9232 .ctx_init = gaudi_ctx_init, 9233 .ctx_fini = gaudi_ctx_fini, 9234 .pre_schedule_cs = gaudi_pre_schedule_cs, 9235 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9236 .load_firmware_to_device = gaudi_load_firmware_to_device, 9237 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9238 .get_signal_cb_size = gaudi_get_signal_cb_size, 9239 .get_wait_cb_size = gaudi_get_wait_cb_size, 9240 .gen_signal_cb = gaudi_gen_signal_cb, 9241 .gen_wait_cb = gaudi_gen_wait_cb, 9242 .reset_sob = gaudi_reset_sob, 9243 .reset_sob_group = gaudi_reset_sob_group, 9244 .get_device_time = gaudi_get_device_time, 9245 .pb_print_security_errors = NULL, 9246 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9247 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9248 .get_dec_base_addr = NULL, 9249 .scramble_addr = hl_mmu_scramble_addr, 9250 .descramble_addr = hl_mmu_descramble_addr, 9251 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9252 .get_hw_block_id = gaudi_get_hw_block_id, 9253 .hw_block_mmap = gaudi_block_mmap, 9254 .enable_events_from_fw = gaudi_enable_events_from_fw, 9255 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9256 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9257 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9258 .init_firmware_loader = gaudi_init_firmware_loader, 9259 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9260 .state_dump_init = gaudi_state_dump_init, 9261 .get_sob_addr = gaudi_get_sob_addr, 9262 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9263 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9264 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9265 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9266 .access_dev_mem = hl_access_dev_mem, 9267 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9268 .send_device_activity = gaudi_send_device_activity, 9269 .set_dram_properties = gaudi_set_dram_properties, 9270 .set_binning_masks = gaudi_set_binning_masks, 9271 }; 9272 9273 /** 9274 * gaudi_set_asic_funcs - set GAUDI function pointers 9275 * 9276 * @hdev: pointer to hl_device structure 9277 * 9278 */ 9279 void gaudi_set_asic_funcs(struct hl_device *hdev) 9280 { 9281 hdev->asic_funcs = &gaudi_funcs; 9282 } 9283