1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85 #define GAUDI_MAX_STRING_LEN 20 86 87 #define GAUDI_CB_POOL_CB_CNT 512 88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 101 102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 103 104 #define MONITOR_SOB_STRING_SIZE 256 105 106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 107 GAUDI_QUEUE_ID_DMA_0_0, 108 GAUDI_QUEUE_ID_DMA_0_1, 109 GAUDI_QUEUE_ID_DMA_0_2, 110 GAUDI_QUEUE_ID_DMA_0_3, 111 GAUDI_QUEUE_ID_DMA_1_0, 112 GAUDI_QUEUE_ID_DMA_1_1, 113 GAUDI_QUEUE_ID_DMA_1_2, 114 GAUDI_QUEUE_ID_DMA_1_3 115 }; 116 117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 121 "gaudi cpu eq" 122 }; 123 124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 133 }; 134 135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 136 [0] = GAUDI_QUEUE_ID_DMA_0_0, 137 [1] = GAUDI_QUEUE_ID_DMA_0_1, 138 [2] = GAUDI_QUEUE_ID_DMA_0_2, 139 [3] = GAUDI_QUEUE_ID_DMA_0_3, 140 [4] = GAUDI_QUEUE_ID_DMA_1_0, 141 [5] = GAUDI_QUEUE_ID_DMA_1_1, 142 [6] = GAUDI_QUEUE_ID_DMA_1_2, 143 [7] = GAUDI_QUEUE_ID_DMA_1_3, 144 }; 145 146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 147 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 152 [PACKET_REPEAT] = sizeof(struct packet_repeat), 153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 154 [PACKET_FENCE] = sizeof(struct packet_fence), 155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 156 [PACKET_NOP] = sizeof(struct packet_nop), 157 [PACKET_STOP] = sizeof(struct packet_stop), 158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 159 [PACKET_WAIT] = sizeof(struct packet_wait), 160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 161 }; 162 163 static inline bool validate_packet_id(enum packet_id id) 164 { 165 switch (id) { 166 case PACKET_WREG_32: 167 case PACKET_WREG_BULK: 168 case PACKET_MSG_LONG: 169 case PACKET_MSG_SHORT: 170 case PACKET_CP_DMA: 171 case PACKET_REPEAT: 172 case PACKET_MSG_PROT: 173 case PACKET_FENCE: 174 case PACKET_LIN_DMA: 175 case PACKET_NOP: 176 case PACKET_STOP: 177 case PACKET_ARB_POINT: 178 case PACKET_WAIT: 179 case PACKET_LOAD_AND_EXE: 180 return true; 181 default: 182 return false; 183 } 184 } 185 186 static const char * const 187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 188 "tpc_address_exceed_slm", 189 "tpc_div_by_0", 190 "tpc_spu_mac_overflow", 191 "tpc_spu_addsub_overflow", 192 "tpc_spu_abs_overflow", 193 "tpc_spu_fp_dst_nan_inf", 194 "tpc_spu_fp_dst_denorm", 195 "tpc_vpu_mac_overflow", 196 "tpc_vpu_addsub_overflow", 197 "tpc_vpu_abs_overflow", 198 "tpc_vpu_fp_dst_nan_inf", 199 "tpc_vpu_fp_dst_denorm", 200 "tpc_assertions", 201 "tpc_illegal_instruction", 202 "tpc_pc_wrap_around", 203 "tpc_qm_sw_err", 204 "tpc_hbw_rresp_err", 205 "tpc_hbw_bresp_err", 206 "tpc_lbw_rresp_err", 207 "tpc_lbw_bresp_err" 208 }; 209 210 static const char * const 211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 212 "PQ AXI HBW error", 213 "CQ AXI HBW error", 214 "CP AXI HBW error", 215 "CP error due to undefined OPCODE", 216 "CP encountered STOP OPCODE", 217 "CP AXI LBW error", 218 "CP WRREG32 or WRBULK returned error", 219 "N/A", 220 "FENCE 0 inc over max value and clipped", 221 "FENCE 1 inc over max value and clipped", 222 "FENCE 2 inc over max value and clipped", 223 "FENCE 3 inc over max value and clipped", 224 "FENCE 0 dec under min value and clipped", 225 "FENCE 1 dec under min value and clipped", 226 "FENCE 2 dec under min value and clipped", 227 "FENCE 3 dec under min value and clipped" 228 }; 229 230 static const char * const 231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 232 "Choice push while full error", 233 "Choice Q watchdog error", 234 "MSG AXI LBW returned with error" 235 }; 236 237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 351 }; 352 353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 381 }; 382 383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 395 }; 396 397 static s64 gaudi_state_dump_specs_props[] = { 398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 401 [SP_MON_OBJ_WR_ADDR_LOW] = 402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 403 [SP_MON_OBJ_WR_ADDR_HIGH] = 404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 425 [SP_FENCE0_CNT_OFFSET] = 426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_FENCE0_RDATA_OFFSET] = 428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 430 [SP_NUM_CORES] = 1, 431 }; 432 433 static const int gaudi_queue_id_to_engine_id[] = { 434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 463 }; 464 465 /* The order here is opposite to the order of the indexing in the h/w. 466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 467 */ 468 static const char * const gaudi_sync_manager_names[] = { 469 "SYNC_MGR_E_N", 470 "SYNC_MGR_W_N", 471 "SYNC_MGR_E_S", 472 "SYNC_MGR_W_S", 473 NULL 474 }; 475 476 struct ecc_info_extract_params { 477 u64 block_address; 478 u32 num_memories; 479 bool derr; 480 }; 481 482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 483 u64 phys_addr); 484 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 485 struct hl_cs_job *job); 486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 487 u32 size, u64 val); 488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 489 u32 num_regs, u32 val); 490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 491 u32 tpc_id); 492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 493 static int gaudi_cpucp_info_get(struct hl_device *hdev); 494 static void gaudi_disable_clock_gating(struct hl_device *hdev); 495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 497 u32 size, bool eb); 498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 499 struct hl_gen_wait_properties *prop); 500 static inline enum hl_collective_mode 501 get_collective_mode(struct hl_device *hdev, u32 queue_id) 502 { 503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 504 return HL_COLLECTIVE_MASTER; 505 506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 508 return HL_COLLECTIVE_SLAVE; 509 510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 512 return HL_COLLECTIVE_SLAVE; 513 514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 516 return HL_COLLECTIVE_SLAVE; 517 518 return HL_COLLECTIVE_NOT_SUPPORTED; 519 } 520 521 static inline void set_default_power_values(struct hl_device *hdev) 522 { 523 struct asic_fixed_properties *prop = &hdev->asic_prop; 524 525 if (hdev->card_type == cpucp_card_type_pmc) { 526 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 527 528 if (prop->fw_security_enabled) 529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 530 else 531 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 532 } else { 533 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 534 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 535 } 536 } 537 538 static int gaudi_set_fixed_properties(struct hl_device *hdev) 539 { 540 struct asic_fixed_properties *prop = &hdev->asic_prop; 541 u32 num_sync_stream_queues = 0; 542 int i; 543 544 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 545 prop->hw_queues_props = kcalloc(prop->max_queues, 546 sizeof(struct hw_queue_properties), 547 GFP_KERNEL); 548 549 if (!prop->hw_queues_props) 550 return -ENOMEM; 551 552 for (i = 0 ; i < prop->max_queues ; i++) { 553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 555 prop->hw_queues_props[i].driver_only = 0; 556 prop->hw_queues_props[i].supports_sync_stream = 1; 557 prop->hw_queues_props[i].cb_alloc_flags = 558 CB_ALLOC_KERNEL; 559 num_sync_stream_queues++; 560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 562 prop->hw_queues_props[i].driver_only = 1; 563 prop->hw_queues_props[i].supports_sync_stream = 0; 564 prop->hw_queues_props[i].cb_alloc_flags = 565 CB_ALLOC_KERNEL; 566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 568 prop->hw_queues_props[i].driver_only = 0; 569 prop->hw_queues_props[i].supports_sync_stream = 0; 570 prop->hw_queues_props[i].cb_alloc_flags = 571 CB_ALLOC_USER; 572 573 } 574 prop->hw_queues_props[i].collective_mode = 575 get_collective_mode(hdev, i); 576 } 577 578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 579 prop->cfg_base_address = CFG_BASE; 580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 581 prop->host_base_address = HOST_PHYS_BASE; 582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 584 prop->completion_mode = HL_COMPLETION_MODE_JOB; 585 prop->collective_first_sob = 0; 586 prop->collective_first_mon = 0; 587 588 /* 2 SOBs per internal queue stream are reserved for collective */ 589 prop->sync_stream_first_sob = 590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 591 * QMAN_STREAMS * HL_RSVD_SOBS; 592 593 /* 1 monitor per internal queue stream are reserved for collective 594 * 2 monitors per external queue stream are reserved for collective 595 */ 596 prop->sync_stream_first_mon = 597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 598 (NUMBER_OF_EXT_HW_QUEUES * 2); 599 600 prop->dram_base_address = DRAM_PHYS_BASE; 601 prop->dram_size = GAUDI_HBM_SIZE_32GB; 602 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 604 605 prop->sram_base_address = SRAM_BASE_ADDR; 606 prop->sram_size = SRAM_SIZE; 607 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 608 prop->sram_user_base_address = 609 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 610 611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 613 614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 615 if (hdev->pldm) 616 prop->mmu_pgt_size = 0x800000; /* 8MB */ 617 else 618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 619 prop->mmu_pte_size = HL_PTE_SIZE; 620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 622 prop->dram_page_size = PAGE_SIZE_2MB; 623 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 624 prop->dram_supports_virtual_memory = false; 625 626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 636 prop->pmmu.start_addr = VA_HOST_SPACE_START; 637 prop->pmmu.end_addr = 638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 639 prop->pmmu.page_size = PAGE_SIZE_4KB; 640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 641 prop->pmmu.last_mask = LAST_MASK; 642 /* TODO: will be duplicated until implementing per-MMU props */ 643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 645 646 /* PMMU and HPMMU are the same except of page size */ 647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 649 650 /* shifts and masks are the same in PMMU and DMMU */ 651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 653 prop->dmmu.end_addr = VA_HOST_SPACE_END; 654 prop->dmmu.page_size = PAGE_SIZE_2MB; 655 656 prop->cfg_size = CFG_SIZE; 657 prop->max_asid = MAX_ASID; 658 prop->num_of_events = GAUDI_EVENT_SIZE; 659 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 660 661 set_default_power_values(hdev); 662 663 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 664 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 665 666 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 667 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 668 669 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 670 CARD_NAME_MAX_LEN); 671 672 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 673 674 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 675 prop->sync_stream_first_sob + 676 (num_sync_stream_queues * HL_RSVD_SOBS); 677 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 678 prop->sync_stream_first_mon + 679 (num_sync_stream_queues * HL_RSVD_MONS); 680 681 prop->first_available_user_interrupt = USHRT_MAX; 682 prop->tpc_interrupt_id = USHRT_MAX; 683 684 for (i = 0 ; i < HL_MAX_DCORES ; i++) 685 prop->first_available_cq[i] = USHRT_MAX; 686 687 prop->fw_cpu_boot_dev_sts0_valid = false; 688 prop->fw_cpu_boot_dev_sts1_valid = false; 689 prop->hard_reset_done_by_fw = false; 690 prop->gic_interrupts_enable = true; 691 692 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 693 694 prop->clk_pll_index = HL_GAUDI_MME_PLL; 695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 696 697 prop->use_get_power_for_reset_history = true; 698 699 prop->configurable_stop_on_err = true; 700 701 prop->set_max_power_on_device_init = true; 702 703 prop->dma_mask = 48; 704 705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 706 707 return 0; 708 } 709 710 static int gaudi_pci_bars_map(struct hl_device *hdev) 711 { 712 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 713 bool is_wc[3] = {false, false, true}; 714 int rc; 715 716 rc = hl_pci_bars_map(hdev, name, is_wc); 717 if (rc) 718 return rc; 719 720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 721 (CFG_BASE - SPI_FLASH_BASE_ADDR); 722 723 return 0; 724 } 725 726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 727 { 728 struct gaudi_device *gaudi = hdev->asic_specific; 729 struct hl_inbound_pci_region pci_region; 730 u64 old_addr = addr; 731 int rc; 732 733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 734 return old_addr; 735 736 if (hdev->asic_prop.iatu_done_by_fw) 737 return U64_MAX; 738 739 /* Inbound Region 2 - Bar 4 - Point to HBM */ 740 pci_region.mode = PCI_BAR_MATCH_MODE; 741 pci_region.bar = HBM_BAR_ID; 742 pci_region.addr = addr; 743 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 744 if (rc) 745 return U64_MAX; 746 747 if (gaudi) { 748 old_addr = gaudi->hbm_bar_cur_addr; 749 gaudi->hbm_bar_cur_addr = addr; 750 } 751 752 return old_addr; 753 } 754 755 static int gaudi_init_iatu(struct hl_device *hdev) 756 { 757 struct hl_inbound_pci_region inbound_region; 758 struct hl_outbound_pci_region outbound_region; 759 int rc; 760 761 if (hdev->asic_prop.iatu_done_by_fw) 762 return 0; 763 764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 765 inbound_region.mode = PCI_BAR_MATCH_MODE; 766 inbound_region.bar = SRAM_BAR_ID; 767 inbound_region.addr = SRAM_BASE_ADDR; 768 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 769 if (rc) 770 goto done; 771 772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 773 inbound_region.mode = PCI_BAR_MATCH_MODE; 774 inbound_region.bar = CFG_BAR_ID; 775 inbound_region.addr = SPI_FLASH_BASE_ADDR; 776 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 777 if (rc) 778 goto done; 779 780 /* Inbound Region 2 - Bar 4 - Point to HBM */ 781 inbound_region.mode = PCI_BAR_MATCH_MODE; 782 inbound_region.bar = HBM_BAR_ID; 783 inbound_region.addr = DRAM_PHYS_BASE; 784 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 785 if (rc) 786 goto done; 787 788 /* Outbound Region 0 - Point to Host */ 789 outbound_region.addr = HOST_PHYS_BASE; 790 outbound_region.size = HOST_PHYS_SIZE; 791 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 792 793 done: 794 return rc; 795 } 796 797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 798 { 799 return RREG32(mmHW_STATE); 800 } 801 802 static int gaudi_early_init(struct hl_device *hdev) 803 { 804 struct asic_fixed_properties *prop = &hdev->asic_prop; 805 struct pci_dev *pdev = hdev->pdev; 806 resource_size_t pci_bar_size; 807 u32 fw_boot_status; 808 int rc; 809 810 rc = gaudi_set_fixed_properties(hdev); 811 if (rc) { 812 dev_err(hdev->dev, "Failed setting fixed properties\n"); 813 return rc; 814 } 815 816 /* Check BAR sizes */ 817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 818 819 if (pci_bar_size != SRAM_BAR_SIZE) { 820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 822 rc = -ENODEV; 823 goto free_queue_props; 824 } 825 826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 827 828 if (pci_bar_size != CFG_BAR_SIZE) { 829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 831 rc = -ENODEV; 832 goto free_queue_props; 833 } 834 835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 837 838 /* If FW security is enabled at this point it means no access to ELBI */ 839 if (hdev->asic_prop.fw_security_enabled) { 840 hdev->asic_prop.iatu_done_by_fw = true; 841 842 /* 843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 844 * decision can only be taken based on PCI ID security. 845 */ 846 hdev->asic_prop.gic_interrupts_enable = false; 847 goto pci_init; 848 } 849 850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 851 &fw_boot_status); 852 if (rc) 853 goto free_queue_props; 854 855 /* Check whether FW is configuring iATU */ 856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 858 hdev->asic_prop.iatu_done_by_fw = true; 859 860 pci_init: 861 rc = hl_pci_init(hdev); 862 if (rc) 863 goto free_queue_props; 864 865 /* Before continuing in the initialization, we need to read the preboot 866 * version to determine whether we run with a security-enabled firmware 867 */ 868 rc = hl_fw_read_preboot_status(hdev); 869 if (rc) { 870 if (hdev->reset_on_preboot_fail) 871 /* we are already on failure flow, so don't check if hw_fini fails. */ 872 hdev->asic_funcs->hw_fini(hdev, true, false); 873 goto pci_fini; 874 } 875 876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 878 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 879 if (rc) { 880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 881 goto pci_fini; 882 } 883 } 884 885 return 0; 886 887 pci_fini: 888 hl_pci_fini(hdev); 889 free_queue_props: 890 kfree(hdev->asic_prop.hw_queues_props); 891 return rc; 892 } 893 894 static int gaudi_early_fini(struct hl_device *hdev) 895 { 896 kfree(hdev->asic_prop.hw_queues_props); 897 hl_pci_fini(hdev); 898 899 return 0; 900 } 901 902 /** 903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 904 * 905 * @hdev: pointer to hl_device structure 906 * 907 */ 908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 909 { 910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 911 struct asic_fixed_properties *prop = &hdev->asic_prop; 912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 913 int rc; 914 915 if ((hdev->fw_components & FW_TYPE_LINUX) && 916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 917 struct gaudi_device *gaudi = hdev->asic_specific; 918 919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 920 return 0; 921 922 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 923 924 if (rc) 925 return rc; 926 927 freq = pll_freq_arr[2]; 928 } else { 929 /* Backward compatibility */ 930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 932 nr = RREG32(mmPSOC_CPU_PLL_NR); 933 nf = RREG32(mmPSOC_CPU_PLL_NF); 934 od = RREG32(mmPSOC_CPU_PLL_OD); 935 936 if (div_sel == DIV_SEL_REF_CLK || 937 div_sel == DIV_SEL_DIVIDED_REF) { 938 if (div_sel == DIV_SEL_REF_CLK) 939 freq = PLL_REF_CLK; 940 else 941 freq = PLL_REF_CLK / (div_fctr + 1); 942 } else if (div_sel == DIV_SEL_PLL_CLK || 943 div_sel == DIV_SEL_DIVIDED_PLL) { 944 pll_clk = PLL_REF_CLK * (nf + 1) / 945 ((nr + 1) * (od + 1)); 946 if (div_sel == DIV_SEL_PLL_CLK) 947 freq = pll_clk; 948 else 949 freq = pll_clk / (div_fctr + 1); 950 } else { 951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 952 freq = 0; 953 } 954 } 955 956 prop->psoc_timestamp_frequency = freq; 957 prop->psoc_pci_pll_nr = nr; 958 prop->psoc_pci_pll_nf = nf; 959 prop->psoc_pci_pll_od = od; 960 prop->psoc_pci_pll_div_factor = div_fctr; 961 962 return 0; 963 } 964 965 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 967 { 968 struct asic_fixed_properties *prop = &hdev->asic_prop; 969 struct packet_lin_dma *init_tpc_mem_pkt; 970 struct hl_cs_job *job; 971 struct hl_cb *cb; 972 u64 dst_addr; 973 u32 cb_size, ctl; 974 u8 tpc_id; 975 int rc; 976 977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 978 if (!cb) 979 return -EFAULT; 980 981 init_tpc_mem_pkt = cb->kernel_address; 982 cb_size = sizeof(*init_tpc_mem_pkt); 983 memset(init_tpc_mem_pkt, 0, cb_size); 984 985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 986 987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 991 992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 993 994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 995 996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 998 round_up(prop->sram_user_base_address, SZ_8K)); 999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1000 1001 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1002 if (!job) { 1003 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1004 rc = -ENOMEM; 1005 goto release_cb; 1006 } 1007 1008 job->id = 0; 1009 job->user_cb = cb; 1010 atomic_inc(&job->user_cb->cs_cnt); 1011 job->user_cb_size = cb_size; 1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1013 job->patched_cb = job->user_cb; 1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1015 1016 hl_debugfs_add_job(hdev, job); 1017 1018 rc = gaudi_send_job_on_qman0(hdev, job); 1019 1020 if (rc) 1021 goto free_job; 1022 1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1024 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1025 if (rc) 1026 break; 1027 } 1028 1029 free_job: 1030 hl_userptr_delete_list(hdev, &job->userptr_list); 1031 hl_debugfs_remove_job(hdev, job); 1032 kfree(job); 1033 atomic_dec(&cb->cs_cnt); 1034 1035 release_cb: 1036 hl_cb_put(cb); 1037 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1038 1039 return rc; 1040 } 1041 1042 /* 1043 * gaudi_init_tpc_mem() - Initialize TPC memories. 1044 * @hdev: Pointer to hl_device structure. 1045 * 1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1047 * 1048 * Return: 0 for success, negative value for error. 1049 */ 1050 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1051 { 1052 const struct firmware *fw; 1053 size_t fw_size; 1054 void *cpu_addr; 1055 dma_addr_t dma_handle; 1056 int rc, count = 5; 1057 1058 again: 1059 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1060 if (rc == -EINTR && count-- > 0) { 1061 msleep(50); 1062 goto again; 1063 } 1064 1065 if (rc) { 1066 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1067 GAUDI_TPC_FW_FILE); 1068 goto out; 1069 } 1070 1071 fw_size = fw->size; 1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1073 if (!cpu_addr) { 1074 dev_err(hdev->dev, 1075 "Failed to allocate %zu of dma memory for TPC kernel\n", 1076 fw_size); 1077 rc = -ENOMEM; 1078 goto out; 1079 } 1080 1081 memcpy(cpu_addr, fw->data, fw_size); 1082 1083 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1084 1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1086 1087 out: 1088 release_firmware(fw); 1089 return rc; 1090 } 1091 1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1093 { 1094 struct gaudi_device *gaudi = hdev->asic_specific; 1095 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1096 struct hl_hw_queue *q; 1097 u32 i, sob_id, sob_group_id, queue_id; 1098 1099 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1100 sob_group_id = 1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1103 1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1106 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1107 q->sync_stream_prop.collective_sob_id = sob_id + i; 1108 } 1109 1110 /* Both DMA5 and TPC7 use the same resources since only a single 1111 * engine need to participate in the reduction process 1112 */ 1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1114 q = &hdev->kernel_queues[queue_id]; 1115 q->sync_stream_prop.collective_sob_id = 1116 sob_id + NIC_NUMBER_OF_ENGINES; 1117 1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1119 q = &hdev->kernel_queues[queue_id]; 1120 q->sync_stream_prop.collective_sob_id = 1121 sob_id + NIC_NUMBER_OF_ENGINES; 1122 } 1123 1124 static void gaudi_sob_group_hw_reset(struct kref *ref) 1125 { 1126 struct gaudi_hw_sob_group *hw_sob_group = 1127 container_of(ref, struct gaudi_hw_sob_group, kref); 1128 struct hl_device *hdev = hw_sob_group->hdev; 1129 int i; 1130 1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1134 1135 kref_init(&hw_sob_group->kref); 1136 } 1137 1138 static void gaudi_sob_group_reset_error(struct kref *ref) 1139 { 1140 struct gaudi_hw_sob_group *hw_sob_group = 1141 container_of(ref, struct gaudi_hw_sob_group, kref); 1142 struct hl_device *hdev = hw_sob_group->hdev; 1143 1144 dev_crit(hdev->dev, 1145 "SOB release shouldn't be called here, base_sob_id: %d\n", 1146 hw_sob_group->base_sob_id); 1147 } 1148 1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1150 { 1151 struct gaudi_collective_properties *prop; 1152 int i; 1153 1154 prop = &gaudi->collective_props; 1155 1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1157 1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1161 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1162 /* Set collective engine bit */ 1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1164 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1165 } 1166 1167 static int gaudi_collective_init(struct hl_device *hdev) 1168 { 1169 u32 i, sob_id, reserved_sobs_per_group; 1170 struct gaudi_collective_properties *prop; 1171 struct gaudi_device *gaudi; 1172 1173 gaudi = hdev->asic_specific; 1174 prop = &gaudi->collective_props; 1175 sob_id = hdev->asic_prop.collective_first_sob; 1176 1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1178 reserved_sobs_per_group = 1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1180 1181 /* Init SOB groups */ 1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1183 prop->hw_sob_group[i].hdev = hdev; 1184 prop->hw_sob_group[i].base_sob_id = sob_id; 1185 sob_id += reserved_sobs_per_group; 1186 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1187 } 1188 1189 for (i = 0 ; i < QMAN_STREAMS; i++) { 1190 prop->next_sob_group_val[i] = 1; 1191 prop->curr_sob_group_idx[i] = 0; 1192 gaudi_collective_map_sobs(hdev, i); 1193 } 1194 1195 gaudi_collective_mstr_sob_mask_set(gaudi); 1196 1197 return 0; 1198 } 1199 1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1201 { 1202 struct gaudi_device *gaudi = hdev->asic_specific; 1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1204 1205 kref_put(&cprop->hw_sob_group[sob_group].kref, 1206 gaudi_sob_group_hw_reset); 1207 } 1208 1209 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1211 { 1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1213 struct gaudi_collective_properties *cprop; 1214 struct hl_gen_wait_properties wait_prop; 1215 struct hl_sync_stream_properties *prop; 1216 struct gaudi_device *gaudi; 1217 1218 gaudi = hdev->asic_specific; 1219 cprop = &gaudi->collective_props; 1220 queue_id = job->hw_queue_id; 1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1222 1223 master_sob_base = 1224 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1225 master_monitor = prop->collective_mstr_mon_id[0]; 1226 1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1228 1229 dev_dbg(hdev->dev, 1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1231 master_sob_base, cprop->mstr_sob_mask[0], 1232 cprop->next_sob_group_val[stream], 1233 master_monitor, queue_id); 1234 1235 wait_prop.data = (void *) job->patched_cb; 1236 wait_prop.sob_base = master_sob_base; 1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1238 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1239 wait_prop.mon_id = master_monitor; 1240 wait_prop.q_idx = queue_id; 1241 wait_prop.size = cb_size; 1242 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1243 1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1245 master_monitor = prop->collective_mstr_mon_id[1]; 1246 1247 dev_dbg(hdev->dev, 1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1249 master_sob_base, cprop->mstr_sob_mask[1], 1250 cprop->next_sob_group_val[stream], 1251 master_monitor, queue_id); 1252 1253 wait_prop.sob_base = master_sob_base; 1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1255 wait_prop.mon_id = master_monitor; 1256 wait_prop.size = cb_size; 1257 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1258 } 1259 1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1262 { 1263 struct hl_gen_wait_properties wait_prop; 1264 struct hl_sync_stream_properties *prop; 1265 u32 queue_id, cb_size = 0; 1266 1267 queue_id = job->hw_queue_id; 1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1269 1270 if (job->cs->encaps_signals) { 1271 /* use the encaps signal handle store earlier in the flow 1272 * and set the SOB information from the encaps 1273 * signals handle 1274 */ 1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1276 cs_cmpl); 1277 1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1279 job->cs->sequence, 1280 cs_cmpl->hw_sob->sob_id, 1281 cs_cmpl->sob_val); 1282 } 1283 1284 /* Add to wait CBs using slave monitor */ 1285 wait_prop.data = (void *) job->user_cb; 1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1287 wait_prop.sob_mask = 0x1; 1288 wait_prop.sob_val = cs_cmpl->sob_val; 1289 wait_prop.mon_id = prop->collective_slave_mon_id; 1290 wait_prop.q_idx = queue_id; 1291 wait_prop.size = cb_size; 1292 1293 dev_dbg(hdev->dev, 1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1296 prop->collective_slave_mon_id, queue_id); 1297 1298 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1299 1300 dev_dbg(hdev->dev, 1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1302 prop->collective_sob_id, queue_id); 1303 1304 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1305 prop->collective_sob_id, cb_size, false); 1306 } 1307 1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1309 { 1310 struct hl_cs_compl *signal_cs_cmpl = 1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1312 struct hl_cs_compl *cs_cmpl = 1313 container_of(cs->fence, struct hl_cs_compl, base_fence); 1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1315 struct gaudi_collective_properties *cprop; 1316 u32 stream, queue_id, sob_group_offset; 1317 struct gaudi_device *gaudi; 1318 struct hl_device *hdev; 1319 struct hl_cs_job *job; 1320 struct hl_ctx *ctx; 1321 1322 ctx = cs->ctx; 1323 hdev = ctx->hdev; 1324 gaudi = hdev->asic_specific; 1325 cprop = &gaudi->collective_props; 1326 1327 if (cs->encaps_signals) { 1328 cs_cmpl->hw_sob = handle->hw_sob; 1329 /* at this checkpoint we only need the hw_sob pointer 1330 * for the completion check before start going over the jobs 1331 * of the master/slaves, the sob_value will be taken later on 1332 * in gaudi_collective_slave_init_job depends on each 1333 * job wait offset value. 1334 */ 1335 cs_cmpl->sob_val = 0; 1336 } else { 1337 /* copy the SOB id and value of the signal CS */ 1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1340 } 1341 1342 /* check again if the signal cs already completed. 1343 * if yes then don't send any wait cs since the hw_sob 1344 * could be in reset already. if signal is not completed 1345 * then get refcount to hw_sob to prevent resetting the sob 1346 * while wait cs is not submitted. 1347 * note that this check is protected by two locks, 1348 * hw queue lock and completion object lock, 1349 * and the same completion object lock also protects 1350 * the hw_sob reset handler function. 1351 * The hw_queue lock prevent out of sync of hw_sob 1352 * refcount value, changed by signal/wait flows. 1353 */ 1354 spin_lock(&signal_cs_cmpl->lock); 1355 1356 if (completion_done(&cs->signal_fence->completion)) { 1357 spin_unlock(&signal_cs_cmpl->lock); 1358 return -EINVAL; 1359 } 1360 /* Increment kref since all slave queues are now waiting on it */ 1361 kref_get(&cs_cmpl->hw_sob->kref); 1362 1363 spin_unlock(&signal_cs_cmpl->lock); 1364 1365 /* Calculate the stream from collective master queue (1st job) */ 1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1367 stream = job->hw_queue_id % 4; 1368 sob_group_offset = 1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1370 1371 list_for_each_entry(job, &cs->job_list, cs_node) { 1372 queue_id = job->hw_queue_id; 1373 1374 if (hdev->kernel_queues[queue_id].collective_mode == 1375 HL_COLLECTIVE_MASTER) 1376 gaudi_collective_master_init_job(hdev, job, stream, 1377 sob_group_offset); 1378 else 1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1380 } 1381 1382 cs_cmpl->sob_group = sob_group_offset; 1383 1384 /* Handle sob group kref and wraparound */ 1385 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1386 cprop->next_sob_group_val[stream]++; 1387 1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1389 /* 1390 * Decrement as we reached the max value. 1391 * The release function won't be called here as we've 1392 * just incremented the refcount. 1393 */ 1394 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1395 gaudi_sob_group_reset_error); 1396 cprop->next_sob_group_val[stream] = 1; 1397 /* only two SOBs are currently in use */ 1398 cprop->curr_sob_group_idx[stream] = 1399 (cprop->curr_sob_group_idx[stream] + 1) & 1400 (HL_RSVD_SOBS - 1); 1401 1402 gaudi_collective_map_sobs(hdev, stream); 1403 1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1405 cprop->curr_sob_group_idx[stream], stream); 1406 } 1407 1408 mb(); 1409 hl_fence_put(cs->signal_fence); 1410 cs->signal_fence = NULL; 1411 1412 return 0; 1413 } 1414 1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1416 { 1417 u32 cacheline_end, additional_commands; 1418 1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1420 additional_commands = sizeof(struct packet_msg_prot) * 2; 1421 1422 if (user_cb_size + additional_commands > cacheline_end) 1423 return cacheline_end - user_cb_size + additional_commands; 1424 else 1425 return additional_commands; 1426 } 1427 1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1429 struct hl_ctx *ctx, struct hl_cs *cs, 1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1431 u32 encaps_signal_offset) 1432 { 1433 struct hw_queue_properties *hw_queue_prop; 1434 struct hl_cs_counters_atomic *cntr; 1435 struct hl_cs_job *job; 1436 struct hl_cb *cb; 1437 u32 cb_size; 1438 bool patched_cb; 1439 1440 cntr = &hdev->aggregated_cs_counters; 1441 1442 if (mode == HL_COLLECTIVE_MASTER) { 1443 /* CB size of collective master queue contains 1444 * 4 msg short packets for monitor 1 configuration 1445 * 1 fence packet 1446 * 4 msg short packets for monitor 2 configuration 1447 * 1 fence packet 1448 * 2 msg prot packets for completion and MSI 1449 */ 1450 cb_size = sizeof(struct packet_msg_short) * 8 + 1451 sizeof(struct packet_fence) * 2 + 1452 sizeof(struct packet_msg_prot) * 2; 1453 patched_cb = true; 1454 } else { 1455 /* CB size of collective slave queues contains 1456 * 4 msg short packets for monitor configuration 1457 * 1 fence packet 1458 * 1 additional msg short packet for sob signal 1459 */ 1460 cb_size = sizeof(struct packet_msg_short) * 5 + 1461 sizeof(struct packet_fence); 1462 patched_cb = false; 1463 } 1464 1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1466 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1467 if (!job) { 1468 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1469 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1470 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1471 return -ENOMEM; 1472 } 1473 1474 /* Allocate internal mapped CB for non patched CBs */ 1475 cb = hl_cb_kernel_create(hdev, cb_size, 1476 hdev->mmu_enable && !patched_cb); 1477 if (!cb) { 1478 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1479 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1480 kfree(job); 1481 return -EFAULT; 1482 } 1483 1484 job->id = 0; 1485 job->cs = cs; 1486 job->user_cb = cb; 1487 atomic_inc(&job->user_cb->cs_cnt); 1488 job->user_cb_size = cb_size; 1489 job->hw_queue_id = queue_id; 1490 1491 /* since its guaranteed to have only one chunk in the collective wait 1492 * cs, we can use this chunk to set the encapsulated signal offset 1493 * in the jobs. 1494 */ 1495 if (cs->encaps_signals) 1496 job->encaps_sig_wait_offset = encaps_signal_offset; 1497 1498 /* 1499 * No need in parsing, user CB is the patched CB. 1500 * We call hl_cb_destroy() out of two reasons - we don't need 1501 * the CB in the CB idr anymore and to decrement its refcount as 1502 * it was incremented inside hl_cb_kernel_create(). 1503 */ 1504 if (patched_cb) 1505 job->patched_cb = job->user_cb; 1506 else 1507 job->patched_cb = NULL; 1508 1509 job->job_cb_size = job->user_cb_size; 1510 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1511 1512 /* increment refcount as for external queues we get completion */ 1513 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1514 cs_get(cs); 1515 1516 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1517 1518 list_add_tail(&job->cs_node, &cs->job_list); 1519 1520 hl_debugfs_add_job(hdev, job); 1521 1522 return 0; 1523 } 1524 1525 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1526 struct hl_ctx *ctx, struct hl_cs *cs, 1527 u32 wait_queue_id, u32 collective_engine_id, 1528 u32 encaps_signal_offset) 1529 { 1530 struct gaudi_device *gaudi = hdev->asic_specific; 1531 struct hw_queue_properties *hw_queue_prop; 1532 u32 queue_id, collective_queue, num_jobs; 1533 u32 stream, nic_queue, nic_idx = 0; 1534 bool skip; 1535 int i, rc = 0; 1536 1537 /* Verify wait queue id is configured as master */ 1538 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1539 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1540 dev_err(hdev->dev, 1541 "Queue %d is not configured as collective master\n", 1542 wait_queue_id); 1543 return -EINVAL; 1544 } 1545 1546 /* Verify engine id is supported */ 1547 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1548 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1549 dev_err(hdev->dev, 1550 "Collective wait does not support engine %u\n", 1551 collective_engine_id); 1552 return -EINVAL; 1553 } 1554 1555 stream = wait_queue_id % 4; 1556 1557 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1558 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1559 else 1560 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1561 1562 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1563 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1564 1565 /* First job goes to the collective master queue, it will wait for 1566 * the collective slave queues to finish execution. 1567 * The synchronization is done using two monitors: 1568 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1569 * reduction engine (DMA5/TPC7). 1570 * 1571 * Rest of the jobs goes to the collective slave queues which will 1572 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1573 */ 1574 for (i = 0 ; i < num_jobs ; i++) { 1575 if (i == 0) { 1576 queue_id = wait_queue_id; 1577 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1578 HL_COLLECTIVE_MASTER, queue_id, 1579 wait_queue_id, encaps_signal_offset); 1580 } else { 1581 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1582 if (gaudi->hw_cap_initialized & 1583 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1584 skip = false; 1585 else 1586 skip = true; 1587 1588 queue_id = nic_queue; 1589 nic_queue += 4; 1590 nic_idx++; 1591 1592 if (skip) 1593 continue; 1594 } else { 1595 queue_id = collective_queue; 1596 } 1597 1598 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1599 HL_COLLECTIVE_SLAVE, queue_id, 1600 wait_queue_id, encaps_signal_offset); 1601 } 1602 1603 if (rc) 1604 return rc; 1605 } 1606 1607 return rc; 1608 } 1609 1610 static int gaudi_late_init(struct hl_device *hdev) 1611 { 1612 struct gaudi_device *gaudi = hdev->asic_specific; 1613 int rc; 1614 1615 rc = gaudi->cpucp_info_get(hdev); 1616 if (rc) { 1617 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1618 return rc; 1619 } 1620 1621 if ((hdev->card_type == cpucp_card_type_pci) && 1622 (hdev->nic_ports_mask & 0x3)) { 1623 dev_info(hdev->dev, 1624 "PCI card detected, only 8 ports are enabled\n"); 1625 hdev->nic_ports_mask &= ~0x3; 1626 1627 /* Stop and disable unused NIC QMANs */ 1628 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1629 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1630 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1631 1632 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1633 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1634 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1635 1636 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1637 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1638 1639 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1640 } 1641 1642 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1643 if (rc) { 1644 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1645 return rc; 1646 } 1647 1648 /* Scrub both SRAM and DRAM */ 1649 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1650 if (rc) 1651 goto disable_pci_access; 1652 1653 rc = gaudi_fetch_psoc_frequency(hdev); 1654 if (rc) { 1655 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1656 goto disable_pci_access; 1657 } 1658 1659 rc = gaudi_mmu_clear_pgt_range(hdev); 1660 if (rc) { 1661 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1662 goto disable_pci_access; 1663 } 1664 1665 rc = gaudi_init_tpc_mem(hdev); 1666 if (rc) { 1667 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1668 goto disable_pci_access; 1669 } 1670 1671 rc = gaudi_collective_init(hdev); 1672 if (rc) { 1673 dev_err(hdev->dev, "Failed to init collective\n"); 1674 goto disable_pci_access; 1675 } 1676 1677 /* We only support a single ASID for the user, so for the sake of optimization, just 1678 * initialize the ASID one time during device initialization with the fixed value of 1 1679 */ 1680 gaudi_mmu_prepare(hdev, 1); 1681 1682 hl_fw_set_pll_profile(hdev); 1683 1684 return 0; 1685 1686 disable_pci_access: 1687 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1688 1689 return rc; 1690 } 1691 1692 static void gaudi_late_fini(struct hl_device *hdev) 1693 { 1694 hl_hwmon_release_resources(hdev); 1695 } 1696 1697 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1698 { 1699 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1700 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1701 int i, j, rc = 0; 1702 1703 /* 1704 * The device CPU works with 40-bits addresses, while bit 39 must be set 1705 * to '1' when accessing the host. 1706 * Bits 49:39 of the full host address are saved for a later 1707 * configuration of the HW to perform extension to 50 bits. 1708 * Because there is a single HW register that holds the extension bits, 1709 * these bits must be identical in all allocated range. 1710 */ 1711 1712 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1713 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1714 &dma_addr_arr[i], 1715 GFP_KERNEL | __GFP_ZERO); 1716 if (!virt_addr_arr[i]) { 1717 rc = -ENOMEM; 1718 goto free_dma_mem_arr; 1719 } 1720 1721 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1722 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1723 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1724 break; 1725 } 1726 1727 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1728 dev_err(hdev->dev, 1729 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1730 rc = -EFAULT; 1731 goto free_dma_mem_arr; 1732 } 1733 1734 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1735 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1736 hdev->cpu_pci_msb_addr = 1737 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1738 1739 if (!hdev->asic_prop.fw_security_enabled) 1740 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1741 1742 free_dma_mem_arr: 1743 for (j = 0 ; j < i ; j++) 1744 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1745 dma_addr_arr[j]); 1746 1747 return rc; 1748 } 1749 1750 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1751 { 1752 struct gaudi_device *gaudi = hdev->asic_specific; 1753 struct gaudi_internal_qman_info *q; 1754 u32 i; 1755 1756 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1757 q = &gaudi->internal_qmans[i]; 1758 if (!q->pq_kernel_addr) 1759 continue; 1760 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1761 } 1762 } 1763 1764 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1765 { 1766 struct gaudi_device *gaudi = hdev->asic_specific; 1767 struct gaudi_internal_qman_info *q; 1768 int rc, i; 1769 1770 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1771 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1772 continue; 1773 1774 q = &gaudi->internal_qmans[i]; 1775 1776 switch (i) { 1777 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1778 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1779 break; 1780 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1781 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1782 break; 1783 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1784 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1785 break; 1786 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1787 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1788 break; 1789 default: 1790 dev_err(hdev->dev, "Bad internal queue index %d", i); 1791 rc = -EINVAL; 1792 goto free_internal_qmans_pq_mem; 1793 } 1794 1795 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1796 GFP_KERNEL | __GFP_ZERO); 1797 if (!q->pq_kernel_addr) { 1798 rc = -ENOMEM; 1799 goto free_internal_qmans_pq_mem; 1800 } 1801 } 1802 1803 return 0; 1804 1805 free_internal_qmans_pq_mem: 1806 gaudi_free_internal_qmans_pq_mem(hdev); 1807 return rc; 1808 } 1809 1810 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1811 { 1812 struct asic_fixed_properties *prop = &hdev->asic_prop; 1813 struct pci_mem_region *region; 1814 1815 /* CFG */ 1816 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1817 region->region_base = CFG_BASE; 1818 region->region_size = CFG_SIZE; 1819 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1820 region->bar_size = CFG_BAR_SIZE; 1821 region->bar_id = CFG_BAR_ID; 1822 region->used = 1; 1823 1824 /* SRAM */ 1825 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1826 region->region_base = SRAM_BASE_ADDR; 1827 region->region_size = SRAM_SIZE; 1828 region->offset_in_bar = 0; 1829 region->bar_size = SRAM_BAR_SIZE; 1830 region->bar_id = SRAM_BAR_ID; 1831 region->used = 1; 1832 1833 /* DRAM */ 1834 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1835 region->region_base = DRAM_PHYS_BASE; 1836 region->region_size = hdev->asic_prop.dram_size; 1837 region->offset_in_bar = 0; 1838 region->bar_size = prop->dram_pci_bar_size; 1839 region->bar_id = HBM_BAR_ID; 1840 region->used = 1; 1841 1842 /* SP SRAM */ 1843 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1844 region->region_base = PSOC_SCRATCHPAD_ADDR; 1845 region->region_size = PSOC_SCRATCHPAD_SIZE; 1846 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1847 region->bar_size = CFG_BAR_SIZE; 1848 region->bar_id = CFG_BAR_ID; 1849 region->used = 1; 1850 } 1851 1852 static int gaudi_sw_init(struct hl_device *hdev) 1853 { 1854 struct gaudi_device *gaudi; 1855 u32 i, event_id = 0; 1856 int rc; 1857 1858 /* Allocate device structure */ 1859 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1860 if (!gaudi) 1861 return -ENOMEM; 1862 1863 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1864 if (gaudi_irq_map_table[i].valid) { 1865 if (event_id == GAUDI_EVENT_SIZE) { 1866 dev_err(hdev->dev, 1867 "Event array exceeds the limit of %u events\n", 1868 GAUDI_EVENT_SIZE); 1869 rc = -EINVAL; 1870 goto free_gaudi_device; 1871 } 1872 1873 gaudi->events[event_id++] = 1874 gaudi_irq_map_table[i].fc_id; 1875 } 1876 } 1877 1878 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1879 1880 hdev->asic_specific = gaudi; 1881 1882 /* Create DMA pool for small allocations */ 1883 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1884 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1885 if (!hdev->dma_pool) { 1886 dev_err(hdev->dev, "failed to create DMA pool\n"); 1887 rc = -ENOMEM; 1888 goto free_gaudi_device; 1889 } 1890 1891 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1892 if (rc) 1893 goto free_dma_pool; 1894 1895 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1896 if (!hdev->cpu_accessible_dma_pool) { 1897 dev_err(hdev->dev, 1898 "Failed to create CPU accessible DMA pool\n"); 1899 rc = -ENOMEM; 1900 goto free_cpu_dma_mem; 1901 } 1902 1903 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1904 (uintptr_t) hdev->cpu_accessible_dma_mem, 1905 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1906 if (rc) { 1907 dev_err(hdev->dev, 1908 "Failed to add memory to CPU accessible DMA pool\n"); 1909 rc = -EFAULT; 1910 goto free_cpu_accessible_dma_pool; 1911 } 1912 1913 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1914 if (rc) 1915 goto free_cpu_accessible_dma_pool; 1916 1917 spin_lock_init(&gaudi->hw_queues_lock); 1918 1919 hdev->supports_sync_stream = true; 1920 hdev->supports_coresight = true; 1921 hdev->supports_staged_submission = true; 1922 hdev->supports_wait_for_multi_cs = true; 1923 1924 hdev->asic_funcs->set_pci_memory_regions(hdev); 1925 hdev->stream_master_qid_arr = 1926 hdev->asic_funcs->get_stream_master_qid_arr(); 1927 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1928 1929 return 0; 1930 1931 free_cpu_accessible_dma_pool: 1932 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1933 free_cpu_dma_mem: 1934 if (!hdev->asic_prop.fw_security_enabled) 1935 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1936 hdev->cpu_pci_msb_addr); 1937 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1938 hdev->cpu_accessible_dma_address); 1939 free_dma_pool: 1940 dma_pool_destroy(hdev->dma_pool); 1941 free_gaudi_device: 1942 kfree(gaudi); 1943 return rc; 1944 } 1945 1946 static int gaudi_sw_fini(struct hl_device *hdev) 1947 { 1948 struct gaudi_device *gaudi = hdev->asic_specific; 1949 1950 gaudi_free_internal_qmans_pq_mem(hdev); 1951 1952 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1953 1954 if (!hdev->asic_prop.fw_security_enabled) 1955 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1956 hdev->cpu_pci_msb_addr); 1957 1958 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1959 hdev->cpu_accessible_dma_address); 1960 1961 dma_pool_destroy(hdev->dma_pool); 1962 1963 kfree(gaudi); 1964 1965 return 0; 1966 } 1967 1968 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1969 { 1970 struct hl_device *hdev = arg; 1971 int i; 1972 1973 if (hdev->disabled) 1974 return IRQ_HANDLED; 1975 1976 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1977 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1978 1979 hl_irq_handler_eq(irq, &hdev->event_queue); 1980 1981 return IRQ_HANDLED; 1982 } 1983 1984 /* 1985 * For backward compatibility, new MSI interrupts should be set after the 1986 * existing CPU and NIC interrupts. 1987 */ 1988 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1989 bool cpu_eq) 1990 { 1991 int msi_vec; 1992 1993 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1994 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1995 GAUDI_EVENT_QUEUE_MSI_IDX); 1996 1997 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1998 (nr + NIC_NUMBER_OF_ENGINES + 1); 1999 2000 return pci_irq_vector(hdev->pdev, msi_vec); 2001 } 2002 2003 static int gaudi_enable_msi_single(struct hl_device *hdev) 2004 { 2005 int rc, irq; 2006 2007 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2008 2009 irq = gaudi_pci_irq_vector(hdev, 0, false); 2010 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2011 "gaudi single msi", hdev); 2012 if (rc) 2013 dev_err(hdev->dev, 2014 "Failed to request single MSI IRQ\n"); 2015 2016 return rc; 2017 } 2018 2019 static int gaudi_enable_msi_multi(struct hl_device *hdev) 2020 { 2021 int cq_cnt = hdev->asic_prop.completion_queues_count; 2022 int rc, i, irq_cnt_init, irq; 2023 2024 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { 2025 irq = gaudi_pci_irq_vector(hdev, i, false); 2026 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], 2027 &hdev->completion_queue[i]); 2028 if (rc) { 2029 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2030 goto free_irqs; 2031 } 2032 } 2033 2034 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); 2035 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], 2036 &hdev->event_queue); 2037 if (rc) { 2038 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 2039 goto free_irqs; 2040 } 2041 2042 return 0; 2043 2044 free_irqs: 2045 for (i = 0 ; i < irq_cnt_init ; i++) 2046 free_irq(gaudi_pci_irq_vector(hdev, i, false), 2047 &hdev->completion_queue[i]); 2048 return rc; 2049 } 2050 2051 static int gaudi_enable_msi(struct hl_device *hdev) 2052 { 2053 struct gaudi_device *gaudi = hdev->asic_specific; 2054 int rc; 2055 2056 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2057 return 0; 2058 2059 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2060 if (rc < 0) { 2061 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2062 return rc; 2063 } 2064 2065 if (rc < NUMBER_OF_INTERRUPTS) { 2066 gaudi->multi_msi_mode = false; 2067 rc = gaudi_enable_msi_single(hdev); 2068 } else { 2069 gaudi->multi_msi_mode = true; 2070 rc = gaudi_enable_msi_multi(hdev); 2071 } 2072 2073 if (rc) 2074 goto free_pci_irq_vectors; 2075 2076 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2077 2078 return 0; 2079 2080 free_pci_irq_vectors: 2081 pci_free_irq_vectors(hdev->pdev); 2082 return rc; 2083 } 2084 2085 static void gaudi_sync_irqs(struct hl_device *hdev) 2086 { 2087 struct gaudi_device *gaudi = hdev->asic_specific; 2088 int i, cq_cnt = hdev->asic_prop.completion_queues_count; 2089 2090 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2091 return; 2092 2093 /* Wait for all pending IRQs to be finished */ 2094 if (gaudi->multi_msi_mode) { 2095 for (i = 0 ; i < cq_cnt ; i++) 2096 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); 2097 2098 synchronize_irq(gaudi_pci_irq_vector(hdev, 2099 GAUDI_EVENT_QUEUE_MSI_IDX, 2100 true)); 2101 } else { 2102 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2103 } 2104 } 2105 2106 static void gaudi_disable_msi(struct hl_device *hdev) 2107 { 2108 struct gaudi_device *gaudi = hdev->asic_specific; 2109 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; 2110 2111 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2112 return; 2113 2114 gaudi_sync_irqs(hdev); 2115 2116 if (gaudi->multi_msi_mode) { 2117 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, 2118 true); 2119 free_irq(irq, &hdev->event_queue); 2120 2121 for (i = 0 ; i < cq_cnt ; i++) { 2122 irq = gaudi_pci_irq_vector(hdev, i, false); 2123 free_irq(irq, &hdev->completion_queue[i]); 2124 } 2125 } else { 2126 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2127 } 2128 2129 pci_free_irq_vectors(hdev->pdev); 2130 2131 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2132 } 2133 2134 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2135 { 2136 struct gaudi_device *gaudi = hdev->asic_specific; 2137 2138 if (hdev->asic_prop.fw_security_enabled) 2139 return; 2140 2141 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2142 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2143 return; 2144 2145 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2146 return; 2147 2148 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2149 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2150 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2151 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2152 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2153 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2154 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2155 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2156 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2157 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2158 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2159 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2160 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2161 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2162 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2163 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2164 2165 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2167 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2169 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2171 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2173 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2174 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2175 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2176 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2177 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2178 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2179 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2180 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2181 2182 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2183 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2184 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2185 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2186 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2187 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2188 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2189 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2190 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2191 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2192 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2193 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2194 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2195 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2196 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2197 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2198 2199 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2200 } 2201 2202 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2203 { 2204 struct gaudi_device *gaudi = hdev->asic_specific; 2205 2206 if (hdev->asic_prop.fw_security_enabled) 2207 return; 2208 2209 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2210 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2211 return; 2212 2213 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2214 return; 2215 2216 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2218 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2220 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2222 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2224 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2225 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2226 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2227 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2228 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2229 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2230 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2231 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2232 2233 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2234 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2235 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2236 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2237 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2238 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2239 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2240 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2241 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2242 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2243 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2244 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2245 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2246 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2247 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2248 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2249 2250 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2251 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2252 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2253 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2254 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2255 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2256 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2257 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2258 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2259 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2260 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2261 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2262 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2263 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2264 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2265 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2266 2267 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2268 } 2269 2270 static void gaudi_init_e2e(struct hl_device *hdev) 2271 { 2272 if (hdev->asic_prop.fw_security_enabled) 2273 return; 2274 2275 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2276 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2277 return; 2278 2279 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2280 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2281 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2282 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2283 2284 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2285 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2286 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2287 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2288 2289 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2290 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2291 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2292 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2293 2294 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2295 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2296 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2297 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2298 2299 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2300 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2301 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2302 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2303 2304 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2305 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2306 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2307 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2308 2309 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2310 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2311 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2312 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2313 2314 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2315 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2316 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2317 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2318 2319 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2320 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2321 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2322 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2323 2324 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2325 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2326 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2327 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2328 2329 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2330 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2331 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2332 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2333 2334 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2335 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2336 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2337 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2338 2339 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2340 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2341 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2342 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2343 2344 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2345 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2346 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2347 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2348 2349 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2350 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2351 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2352 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2353 2354 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2355 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2356 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2357 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2358 2359 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2360 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2361 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2362 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2363 2364 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2365 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2366 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2367 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2368 2369 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2370 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2371 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2372 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2373 2374 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2375 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2376 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2377 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2378 2379 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2380 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2381 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2382 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2383 2384 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2385 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2386 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2387 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2388 2389 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2390 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2391 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2392 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2393 2394 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2395 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2396 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2397 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2398 2399 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2400 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2401 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2402 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2403 2404 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2405 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2406 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2407 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2408 2409 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2410 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2411 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2412 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2413 2414 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2415 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2416 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2417 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2418 2419 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2420 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2421 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2422 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2423 2424 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2425 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2426 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2427 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2428 2429 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2430 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2431 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2432 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2433 2434 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2435 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2436 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2437 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2438 2439 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2440 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2441 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2442 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2443 2444 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2445 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2446 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2447 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2448 2449 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2450 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2451 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2452 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2453 2454 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2455 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2456 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2457 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2458 2459 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2460 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2461 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2462 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2463 2464 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2465 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2466 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2467 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2468 2469 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2470 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2471 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2472 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2473 2474 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2475 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2476 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2477 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2478 2479 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2480 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2481 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2482 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2483 2484 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2485 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2486 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2487 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2488 2489 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2490 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2491 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2492 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2493 2494 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2495 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2496 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2497 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2498 2499 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2500 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2501 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2502 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2503 2504 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2505 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2506 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2507 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2508 2509 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2510 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2511 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2512 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2513 2514 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2515 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2516 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2517 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2518 } 2519 2520 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2521 { 2522 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2523 2524 if (hdev->asic_prop.fw_security_enabled) 2525 return; 2526 2527 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2528 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2529 return; 2530 2531 hbm0_wr = 0x33333333; 2532 hbm0_rd = 0x77777777; 2533 hbm1_wr = 0x55555555; 2534 hbm1_rd = 0xDDDDDDDD; 2535 2536 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2537 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2538 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2539 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2540 2541 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2542 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2543 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2544 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2545 2546 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2547 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2548 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2549 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2550 2551 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2552 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2553 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2554 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2555 2556 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2557 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2558 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2559 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2560 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2561 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2562 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2563 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2564 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2565 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2566 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2567 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2568 2569 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2570 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2571 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2572 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2573 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2574 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2575 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2576 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2577 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2578 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2579 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2580 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2581 } 2582 2583 static void gaudi_init_golden_registers(struct hl_device *hdev) 2584 { 2585 u32 tpc_offset; 2586 int tpc_id, i; 2587 2588 gaudi_init_e2e(hdev); 2589 gaudi_init_hbm_cred(hdev); 2590 2591 for (tpc_id = 0, tpc_offset = 0; 2592 tpc_id < TPC_NUMBER_OF_ENGINES; 2593 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2594 /* Mask all arithmetic interrupts from TPC */ 2595 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2596 /* Set 16 cache lines */ 2597 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2598 ICACHE_FETCH_LINE_NUM, 2); 2599 } 2600 2601 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2602 for (i = 0 ; i < 128 ; i += 8) 2603 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2604 2605 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2606 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2607 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2608 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2609 } 2610 2611 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2612 int qman_id, dma_addr_t qman_pq_addr) 2613 { 2614 struct cpu_dyn_regs *dyn_regs = 2615 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2616 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2617 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2618 u32 q_off, dma_qm_offset; 2619 u32 dma_qm_err_cfg, irq_handler_offset; 2620 2621 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2622 2623 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2624 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2625 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2626 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2627 so_base_en_lo = lower_32_bits(CFG_BASE + 2628 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2629 so_base_en_hi = upper_32_bits(CFG_BASE + 2630 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2631 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2632 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2633 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2634 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2635 so_base_ws_lo = lower_32_bits(CFG_BASE + 2636 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2637 so_base_ws_hi = upper_32_bits(CFG_BASE + 2638 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2639 2640 q_off = dma_qm_offset + qman_id * 4; 2641 2642 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2643 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2644 2645 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2646 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2647 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2648 2649 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2650 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2651 QMAN_LDMA_SRC_OFFSET); 2652 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2653 QMAN_LDMA_DST_OFFSET); 2654 2655 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2656 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2657 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2658 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2659 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2660 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2661 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2662 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2663 2664 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2665 2666 /* The following configuration is needed only once per QMAN */ 2667 if (qman_id == 0) { 2668 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2669 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2670 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2671 2672 /* Configure RAZWI IRQ */ 2673 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2674 if (hdev->stop_on_err) 2675 dma_qm_err_cfg |= 2676 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2677 2678 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2679 2680 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2681 lower_32_bits(CFG_BASE + irq_handler_offset)); 2682 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2683 upper_32_bits(CFG_BASE + irq_handler_offset)); 2684 2685 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2686 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2687 dma_id); 2688 2689 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2690 QM_ARB_ERR_MSG_EN_MASK); 2691 2692 /* Set timeout to maximum */ 2693 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2694 2695 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2696 QMAN_EXTERNAL_MAKE_TRUSTED); 2697 2698 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2699 } 2700 } 2701 2702 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2703 { 2704 struct cpu_dyn_regs *dyn_regs = 2705 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2706 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2707 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2708 u32 irq_handler_offset; 2709 2710 /* Set to maximum possible according to physical size */ 2711 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2712 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2713 2714 /* WA for H/W bug H3-2116 */ 2715 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2716 2717 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2718 if (hdev->stop_on_err) 2719 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2720 2721 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2722 2723 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2724 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2725 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2726 2727 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2728 lower_32_bits(CFG_BASE + irq_handler_offset)); 2729 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2730 upper_32_bits(CFG_BASE + irq_handler_offset)); 2731 2732 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2733 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2734 WREG32(mmDMA0_CORE_PROT + dma_offset, 2735 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2736 /* If the channel is secured, it should be in MMU bypass mode */ 2737 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2738 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2739 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2740 } 2741 2742 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2743 u32 enable_mask) 2744 { 2745 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2746 2747 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2748 } 2749 2750 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2751 { 2752 struct gaudi_device *gaudi = hdev->asic_specific; 2753 struct hl_hw_queue *q; 2754 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2755 2756 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2757 return; 2758 2759 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2760 dma_id = gaudi_dma_assignment[i]; 2761 /* 2762 * For queues after the CPU Q need to add 1 to get the correct 2763 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2764 * order to get the correct MSI register. 2765 */ 2766 if (dma_id > 1) { 2767 cpu_skip = 1; 2768 nic_skip = NIC_NUMBER_OF_ENGINES; 2769 } else { 2770 cpu_skip = 0; 2771 nic_skip = 0; 2772 } 2773 2774 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2775 q_idx = 4 * dma_id + j + cpu_skip; 2776 q = &hdev->kernel_queues[q_idx]; 2777 q->cq_id = cq_id++; 2778 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2779 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2780 q->bus_address); 2781 } 2782 2783 gaudi_init_dma_core(hdev, dma_id); 2784 2785 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2786 } 2787 2788 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2789 } 2790 2791 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2792 int qman_id, u64 qman_base_addr) 2793 { 2794 struct cpu_dyn_regs *dyn_regs = 2795 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2796 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2797 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2798 u32 dma_qm_err_cfg, irq_handler_offset; 2799 u32 q_off, dma_qm_offset; 2800 2801 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2802 2803 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2804 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2805 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2806 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2807 so_base_en_lo = lower_32_bits(CFG_BASE + 2808 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2809 so_base_en_hi = upper_32_bits(CFG_BASE + 2810 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2811 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2812 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2813 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2814 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2815 so_base_ws_lo = lower_32_bits(CFG_BASE + 2816 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2817 so_base_ws_hi = upper_32_bits(CFG_BASE + 2818 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2819 2820 q_off = dma_qm_offset + qman_id * 4; 2821 2822 if (qman_id < 4) { 2823 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2824 lower_32_bits(qman_base_addr)); 2825 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2826 upper_32_bits(qman_base_addr)); 2827 2828 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2829 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2830 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2831 2832 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2833 QMAN_CPDMA_SIZE_OFFSET); 2834 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2835 QMAN_CPDMA_SRC_OFFSET); 2836 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2837 QMAN_CPDMA_DST_OFFSET); 2838 } else { 2839 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2840 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2841 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2842 2843 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2844 QMAN_LDMA_SIZE_OFFSET); 2845 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2846 QMAN_LDMA_SRC_OFFSET); 2847 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2848 QMAN_LDMA_DST_OFFSET); 2849 2850 /* Configure RAZWI IRQ */ 2851 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2852 if (hdev->stop_on_err) 2853 dma_qm_err_cfg |= 2854 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2855 2856 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2857 2858 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2859 lower_32_bits(CFG_BASE + irq_handler_offset)); 2860 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2861 upper_32_bits(CFG_BASE + irq_handler_offset)); 2862 2863 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2864 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2865 dma_id); 2866 2867 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2868 QM_ARB_ERR_MSG_EN_MASK); 2869 2870 /* Set timeout to maximum */ 2871 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2872 2873 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2874 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2875 QMAN_INTERNAL_MAKE_TRUSTED); 2876 } 2877 2878 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2879 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2880 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2881 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2882 2883 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2884 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2885 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2886 mtr_base_ws_lo); 2887 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2888 mtr_base_ws_hi); 2889 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2890 so_base_ws_lo); 2891 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2892 so_base_ws_hi); 2893 } 2894 } 2895 2896 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2897 { 2898 struct gaudi_device *gaudi = hdev->asic_specific; 2899 struct gaudi_internal_qman_info *q; 2900 u64 qman_base_addr; 2901 int i, j, dma_id, internal_q_index; 2902 2903 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2904 return; 2905 2906 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2907 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2908 2909 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2910 /* 2911 * Add the CPU queue in order to get the correct queue 2912 * number as all internal queue are placed after it 2913 */ 2914 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2915 2916 q = &gaudi->internal_qmans[internal_q_index]; 2917 qman_base_addr = (u64) q->pq_dma_addr; 2918 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2919 qman_base_addr); 2920 } 2921 2922 /* Initializing lower CP for HBM DMA QMAN */ 2923 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2924 2925 gaudi_init_dma_core(hdev, dma_id); 2926 2927 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2928 } 2929 2930 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2931 } 2932 2933 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2934 int qman_id, u64 qman_base_addr) 2935 { 2936 struct cpu_dyn_regs *dyn_regs = 2937 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2938 u32 mtr_base_lo, mtr_base_hi; 2939 u32 so_base_lo, so_base_hi; 2940 u32 irq_handler_offset; 2941 u32 q_off, mme_id; 2942 u32 mme_qm_err_cfg; 2943 2944 mtr_base_lo = lower_32_bits(CFG_BASE + 2945 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2946 mtr_base_hi = upper_32_bits(CFG_BASE + 2947 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2948 so_base_lo = lower_32_bits(CFG_BASE + 2949 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2950 so_base_hi = upper_32_bits(CFG_BASE + 2951 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2952 2953 q_off = mme_offset + qman_id * 4; 2954 2955 if (qman_id < 4) { 2956 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2957 lower_32_bits(qman_base_addr)); 2958 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2959 upper_32_bits(qman_base_addr)); 2960 2961 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2962 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2963 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2964 2965 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2966 QMAN_CPDMA_SIZE_OFFSET); 2967 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2968 QMAN_CPDMA_SRC_OFFSET); 2969 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2970 QMAN_CPDMA_DST_OFFSET); 2971 } else { 2972 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2973 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2974 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2975 2976 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2977 QMAN_LDMA_SIZE_OFFSET); 2978 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2979 QMAN_LDMA_SRC_OFFSET); 2980 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2981 QMAN_LDMA_DST_OFFSET); 2982 2983 /* Configure RAZWI IRQ */ 2984 mme_id = mme_offset / 2985 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2986 2987 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2988 if (hdev->stop_on_err) 2989 mme_qm_err_cfg |= 2990 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2991 2992 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2993 2994 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2995 lower_32_bits(CFG_BASE + irq_handler_offset)); 2996 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2997 upper_32_bits(CFG_BASE + irq_handler_offset)); 2998 2999 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 3000 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 3001 mme_id); 3002 3003 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 3004 QM_ARB_ERR_MSG_EN_MASK); 3005 3006 /* Set timeout to maximum */ 3007 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 3008 3009 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 3010 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 3011 QMAN_INTERNAL_MAKE_TRUSTED); 3012 } 3013 3014 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 3015 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 3016 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 3017 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 3018 } 3019 3020 static void gaudi_init_mme_qmans(struct hl_device *hdev) 3021 { 3022 struct gaudi_device *gaudi = hdev->asic_specific; 3023 struct gaudi_internal_qman_info *q; 3024 u64 qman_base_addr; 3025 u32 mme_offset; 3026 int i, internal_q_index; 3027 3028 if (gaudi->hw_cap_initialized & HW_CAP_MME) 3029 return; 3030 3031 /* 3032 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 3033 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 3034 */ 3035 3036 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3037 3038 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 3039 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 3040 q = &gaudi->internal_qmans[internal_q_index]; 3041 qman_base_addr = (u64) q->pq_dma_addr; 3042 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 3043 qman_base_addr); 3044 if (i == 3) 3045 mme_offset = 0; 3046 } 3047 3048 /* Initializing lower CP for MME QMANs */ 3049 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 3050 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 3051 gaudi_init_mme_qman(hdev, 0, 4, 0); 3052 3053 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3054 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 3055 3056 gaudi->hw_cap_initialized |= HW_CAP_MME; 3057 } 3058 3059 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 3060 int qman_id, u64 qman_base_addr) 3061 { 3062 struct cpu_dyn_regs *dyn_regs = 3063 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3064 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3065 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3066 u32 tpc_qm_err_cfg, irq_handler_offset; 3067 u32 q_off, tpc_id; 3068 3069 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3070 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3071 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3072 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3073 so_base_en_lo = lower_32_bits(CFG_BASE + 3074 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3075 so_base_en_hi = upper_32_bits(CFG_BASE + 3076 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3077 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3078 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3079 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3080 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3081 so_base_ws_lo = lower_32_bits(CFG_BASE + 3082 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3083 so_base_ws_hi = upper_32_bits(CFG_BASE + 3084 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3085 3086 q_off = tpc_offset + qman_id * 4; 3087 3088 tpc_id = tpc_offset / 3089 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3090 3091 if (qman_id < 4) { 3092 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3093 lower_32_bits(qman_base_addr)); 3094 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3095 upper_32_bits(qman_base_addr)); 3096 3097 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3098 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3099 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3100 3101 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3102 QMAN_CPDMA_SIZE_OFFSET); 3103 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3104 QMAN_CPDMA_SRC_OFFSET); 3105 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3106 QMAN_CPDMA_DST_OFFSET); 3107 } else { 3108 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3109 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3110 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3111 3112 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3113 QMAN_LDMA_SIZE_OFFSET); 3114 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3115 QMAN_LDMA_SRC_OFFSET); 3116 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3117 QMAN_LDMA_DST_OFFSET); 3118 3119 /* Configure RAZWI IRQ */ 3120 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3121 if (hdev->stop_on_err) 3122 tpc_qm_err_cfg |= 3123 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3124 3125 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3126 3127 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3128 lower_32_bits(CFG_BASE + irq_handler_offset)); 3129 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3130 upper_32_bits(CFG_BASE + irq_handler_offset)); 3131 3132 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3133 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3134 tpc_id); 3135 3136 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3137 QM_ARB_ERR_MSG_EN_MASK); 3138 3139 /* Set timeout to maximum */ 3140 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3141 3142 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3143 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3144 QMAN_INTERNAL_MAKE_TRUSTED); 3145 } 3146 3147 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3148 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3149 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3150 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3151 3152 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3153 if (tpc_id == 6) { 3154 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3155 mtr_base_ws_lo); 3156 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3157 mtr_base_ws_hi); 3158 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3159 so_base_ws_lo); 3160 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3161 so_base_ws_hi); 3162 } 3163 } 3164 3165 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3166 { 3167 struct gaudi_device *gaudi = hdev->asic_specific; 3168 struct gaudi_internal_qman_info *q; 3169 u64 qman_base_addr; 3170 u32 so_base_hi, tpc_offset = 0; 3171 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3172 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3173 int i, tpc_id, internal_q_index; 3174 3175 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3176 return; 3177 3178 so_base_hi = upper_32_bits(CFG_BASE + 3179 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3180 3181 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3182 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3183 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3184 tpc_id * QMAN_STREAMS + i; 3185 q = &gaudi->internal_qmans[internal_q_index]; 3186 qman_base_addr = (u64) q->pq_dma_addr; 3187 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3188 qman_base_addr); 3189 3190 if (i == 3) { 3191 /* Initializing lower CP for TPC QMAN */ 3192 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3193 3194 /* Enable the QMAN and TPC channel */ 3195 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3196 QMAN_TPC_ENABLE); 3197 } 3198 } 3199 3200 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3201 so_base_hi); 3202 3203 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3204 3205 gaudi->hw_cap_initialized |= 3206 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3207 } 3208 } 3209 3210 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3211 int qman_id, u64 qman_base_addr, int nic_id) 3212 { 3213 struct cpu_dyn_regs *dyn_regs = 3214 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3215 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3216 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3217 u32 nic_qm_err_cfg, irq_handler_offset; 3218 u32 q_off; 3219 3220 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3221 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3222 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3223 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3224 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3225 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3226 so_base_en_hi = upper_32_bits(CFG_BASE + 3227 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3228 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3229 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3230 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3231 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3232 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3233 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3234 so_base_ws_hi = upper_32_bits(CFG_BASE + 3235 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3236 3237 q_off = nic_offset + qman_id * 4; 3238 3239 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3240 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3241 3242 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3243 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3244 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3245 3246 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3247 QMAN_LDMA_SIZE_OFFSET); 3248 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3249 QMAN_LDMA_SRC_OFFSET); 3250 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3251 QMAN_LDMA_DST_OFFSET); 3252 3253 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3254 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3255 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3256 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3257 3258 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3259 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3260 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3261 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3262 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3263 3264 if (qman_id == 0) { 3265 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3266 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3267 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3268 3269 /* Configure RAZWI IRQ */ 3270 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3271 if (hdev->stop_on_err) 3272 nic_qm_err_cfg |= 3273 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3274 3275 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3276 3277 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3278 lower_32_bits(CFG_BASE + irq_handler_offset)); 3279 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3280 upper_32_bits(CFG_BASE + irq_handler_offset)); 3281 3282 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3283 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3284 nic_id); 3285 3286 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3287 QM_ARB_ERR_MSG_EN_MASK); 3288 3289 /* Set timeout to maximum */ 3290 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3291 3292 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3293 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3294 QMAN_INTERNAL_MAKE_TRUSTED); 3295 } 3296 } 3297 3298 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3299 { 3300 struct gaudi_device *gaudi = hdev->asic_specific; 3301 struct gaudi_internal_qman_info *q; 3302 u64 qman_base_addr; 3303 u32 nic_offset = 0; 3304 u32 nic_delta_between_qmans = 3305 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3306 u32 nic_delta_between_nics = 3307 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3308 int i, nic_id, internal_q_index; 3309 3310 if (!hdev->nic_ports_mask) 3311 return; 3312 3313 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3314 return; 3315 3316 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3317 3318 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3319 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3320 nic_offset += nic_delta_between_qmans; 3321 if (nic_id & 1) { 3322 nic_offset -= (nic_delta_between_qmans * 2); 3323 nic_offset += nic_delta_between_nics; 3324 } 3325 continue; 3326 } 3327 3328 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3329 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3330 nic_id * QMAN_STREAMS + i; 3331 q = &gaudi->internal_qmans[internal_q_index]; 3332 qman_base_addr = (u64) q->pq_dma_addr; 3333 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3334 qman_base_addr, nic_id); 3335 } 3336 3337 /* Enable the QMAN */ 3338 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3339 3340 nic_offset += nic_delta_between_qmans; 3341 if (nic_id & 1) { 3342 nic_offset -= (nic_delta_between_qmans * 2); 3343 nic_offset += nic_delta_between_nics; 3344 } 3345 3346 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3347 } 3348 } 3349 3350 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3351 { 3352 struct gaudi_device *gaudi = hdev->asic_specific; 3353 3354 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3355 return; 3356 3357 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3358 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3359 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3360 } 3361 3362 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3363 { 3364 struct gaudi_device *gaudi = hdev->asic_specific; 3365 3366 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3367 return; 3368 3369 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3370 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3371 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3372 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3373 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3374 } 3375 3376 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3377 { 3378 struct gaudi_device *gaudi = hdev->asic_specific; 3379 3380 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3381 return; 3382 3383 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3384 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3385 } 3386 3387 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3388 { 3389 struct gaudi_device *gaudi = hdev->asic_specific; 3390 u32 tpc_offset = 0; 3391 int tpc_id; 3392 3393 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3394 return; 3395 3396 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3397 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3398 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3399 } 3400 } 3401 3402 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3403 { 3404 struct gaudi_device *gaudi = hdev->asic_specific; 3405 u32 nic_mask, nic_offset = 0; 3406 u32 nic_delta_between_qmans = 3407 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3408 u32 nic_delta_between_nics = 3409 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3410 int nic_id; 3411 3412 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3413 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3414 3415 if (gaudi->hw_cap_initialized & nic_mask) 3416 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3417 3418 nic_offset += nic_delta_between_qmans; 3419 if (nic_id & 1) { 3420 nic_offset -= (nic_delta_between_qmans * 2); 3421 nic_offset += nic_delta_between_nics; 3422 } 3423 } 3424 } 3425 3426 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3427 { 3428 struct gaudi_device *gaudi = hdev->asic_specific; 3429 3430 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3431 return; 3432 3433 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3434 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3435 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3436 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3437 } 3438 3439 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3440 { 3441 struct gaudi_device *gaudi = hdev->asic_specific; 3442 3443 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3444 return; 3445 3446 /* Stop CPs of HBM DMA QMANs */ 3447 3448 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3449 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3450 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3451 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3452 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3453 } 3454 3455 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3456 { 3457 struct gaudi_device *gaudi = hdev->asic_specific; 3458 3459 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3460 return; 3461 3462 /* Stop CPs of MME QMANs */ 3463 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3464 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3465 } 3466 3467 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3468 { 3469 struct gaudi_device *gaudi = hdev->asic_specific; 3470 3471 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3472 return; 3473 3474 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3475 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3476 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3477 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3478 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3479 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3480 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3481 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3482 } 3483 3484 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3485 { 3486 struct gaudi_device *gaudi = hdev->asic_specific; 3487 3488 /* Stop upper CPs of QMANs */ 3489 3490 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3491 WREG32(mmNIC0_QM0_GLBL_CFG1, 3492 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3493 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3494 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3495 3496 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3497 WREG32(mmNIC0_QM1_GLBL_CFG1, 3498 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3499 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3500 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3501 3502 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3503 WREG32(mmNIC1_QM0_GLBL_CFG1, 3504 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3505 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3506 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3507 3508 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3509 WREG32(mmNIC1_QM1_GLBL_CFG1, 3510 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3511 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3512 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3513 3514 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3515 WREG32(mmNIC2_QM0_GLBL_CFG1, 3516 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3517 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3518 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3519 3520 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3521 WREG32(mmNIC2_QM1_GLBL_CFG1, 3522 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3523 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3524 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3525 3526 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3527 WREG32(mmNIC3_QM0_GLBL_CFG1, 3528 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3529 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3530 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3531 3532 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3533 WREG32(mmNIC3_QM1_GLBL_CFG1, 3534 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3535 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3536 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3537 3538 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3539 WREG32(mmNIC4_QM0_GLBL_CFG1, 3540 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3541 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3542 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3543 3544 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3545 WREG32(mmNIC4_QM1_GLBL_CFG1, 3546 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3547 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3548 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3549 } 3550 3551 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3552 { 3553 struct gaudi_device *gaudi = hdev->asic_specific; 3554 3555 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3556 return; 3557 3558 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3559 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3560 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3561 } 3562 3563 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3564 { 3565 struct gaudi_device *gaudi = hdev->asic_specific; 3566 3567 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3568 return; 3569 3570 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3571 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3572 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3573 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3574 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3575 } 3576 3577 static void gaudi_mme_stall(struct hl_device *hdev) 3578 { 3579 struct gaudi_device *gaudi = hdev->asic_specific; 3580 3581 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3582 return; 3583 3584 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3585 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3586 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3587 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3588 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3589 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3590 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3591 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3592 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3593 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3594 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3595 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3596 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3597 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3598 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3599 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3600 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3601 } 3602 3603 static void gaudi_tpc_stall(struct hl_device *hdev) 3604 { 3605 struct gaudi_device *gaudi = hdev->asic_specific; 3606 3607 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3608 return; 3609 3610 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3611 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3612 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3613 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3614 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3615 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3616 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3617 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3618 } 3619 3620 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3621 { 3622 u32 qman_offset; 3623 int i; 3624 3625 if (hdev->asic_prop.fw_security_enabled) 3626 return; 3627 3628 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3629 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3630 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3631 3632 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3633 } 3634 3635 WREG32(mmMME0_QM_CGM_CFG, 0); 3636 WREG32(mmMME0_QM_CGM_CFG1, 0); 3637 WREG32(mmMME2_QM_CGM_CFG, 0); 3638 WREG32(mmMME2_QM_CGM_CFG1, 0); 3639 3640 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3641 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3642 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3643 3644 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3645 } 3646 } 3647 3648 static void gaudi_enable_timestamp(struct hl_device *hdev) 3649 { 3650 /* Disable the timestamp counter */ 3651 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3652 3653 /* Zero the lower/upper parts of the 64-bit counter */ 3654 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3655 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3656 3657 /* Enable the counter */ 3658 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3659 } 3660 3661 static void gaudi_disable_timestamp(struct hl_device *hdev) 3662 { 3663 /* Disable the timestamp counter */ 3664 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3665 } 3666 3667 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3668 { 3669 u32 wait_timeout_ms; 3670 3671 if (hdev->pldm) 3672 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3673 else 3674 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3675 3676 if (fw_reset) 3677 goto skip_engines; 3678 3679 gaudi_stop_nic_qmans(hdev); 3680 gaudi_stop_mme_qmans(hdev); 3681 gaudi_stop_tpc_qmans(hdev); 3682 gaudi_stop_hbm_dma_qmans(hdev); 3683 gaudi_stop_pci_dma_qmans(hdev); 3684 3685 msleep(wait_timeout_ms); 3686 3687 gaudi_pci_dma_stall(hdev); 3688 gaudi_hbm_dma_stall(hdev); 3689 gaudi_tpc_stall(hdev); 3690 gaudi_mme_stall(hdev); 3691 3692 msleep(wait_timeout_ms); 3693 3694 gaudi_disable_nic_qmans(hdev); 3695 gaudi_disable_mme_qmans(hdev); 3696 gaudi_disable_tpc_qmans(hdev); 3697 gaudi_disable_hbm_dma_qmans(hdev); 3698 gaudi_disable_pci_dma_qmans(hdev); 3699 3700 gaudi_disable_timestamp(hdev); 3701 3702 skip_engines: 3703 gaudi_disable_msi(hdev); 3704 } 3705 3706 static int gaudi_mmu_init(struct hl_device *hdev) 3707 { 3708 struct asic_fixed_properties *prop = &hdev->asic_prop; 3709 struct gaudi_device *gaudi = hdev->asic_specific; 3710 u64 hop0_addr; 3711 int rc, i; 3712 3713 if (!hdev->mmu_enable) 3714 return 0; 3715 3716 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3717 return 0; 3718 3719 for (i = 0 ; i < prop->max_asid ; i++) { 3720 hop0_addr = prop->mmu_pgt_addr + 3721 (i * prop->mmu_hop_table_size); 3722 3723 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3724 if (rc) { 3725 dev_err(hdev->dev, 3726 "failed to set hop0 addr for asid %d\n", i); 3727 goto err; 3728 } 3729 } 3730 3731 /* init MMU cache manage page */ 3732 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3733 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3734 3735 /* mem cache invalidation */ 3736 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3737 3738 hl_mmu_invalidate_cache(hdev, true, 0); 3739 3740 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3741 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3742 3743 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3744 3745 /* 3746 * The H/W expects the first PI after init to be 1. After wraparound 3747 * we'll write 0. 3748 */ 3749 gaudi->mmu_cache_inv_pi = 1; 3750 3751 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3752 3753 return 0; 3754 3755 err: 3756 return rc; 3757 } 3758 3759 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3760 { 3761 void __iomem *dst; 3762 3763 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3764 3765 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3766 } 3767 3768 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3769 { 3770 void __iomem *dst; 3771 3772 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3773 3774 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3775 } 3776 3777 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3778 { 3779 struct dynamic_fw_load_mgr *dynamic_loader; 3780 struct cpu_dyn_regs *dyn_regs; 3781 3782 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3783 3784 /* 3785 * here we update initial values for few specific dynamic regs (as 3786 * before reading the first descriptor from FW those value has to be 3787 * hard-coded) in later stages of the protocol those values will be 3788 * updated automatically by reading the FW descriptor so data there 3789 * will always be up-to-date 3790 */ 3791 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3792 dyn_regs->kmd_msg_to_cpu = 3793 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3794 dyn_regs->cpu_cmd_status_to_host = 3795 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3796 3797 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3798 } 3799 3800 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3801 { 3802 struct static_fw_load_mgr *static_loader; 3803 3804 static_loader = &hdev->fw_loader.static_loader; 3805 3806 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3807 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3808 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3809 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3810 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3811 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3812 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3813 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3814 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3815 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3816 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3817 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3818 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3819 GAUDI_PLDM_RESET_WAIT_MSEC : 3820 GAUDI_CPU_RESET_WAIT_MSEC; 3821 } 3822 3823 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3824 { 3825 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3826 3827 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3828 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3829 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3830 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3831 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3832 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3833 } 3834 3835 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3836 { 3837 struct asic_fixed_properties *prop = &hdev->asic_prop; 3838 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3839 3840 /* fill common fields */ 3841 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3842 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3843 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3844 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3845 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3846 fw_loader->skip_bmc = !hdev->bmc_enable; 3847 fw_loader->sram_bar_id = SRAM_BAR_ID; 3848 fw_loader->dram_bar_id = HBM_BAR_ID; 3849 3850 if (prop->dynamic_fw_load) 3851 gaudi_init_dynamic_firmware_loader(hdev); 3852 else 3853 gaudi_init_static_firmware_loader(hdev); 3854 } 3855 3856 static int gaudi_init_cpu(struct hl_device *hdev) 3857 { 3858 struct gaudi_device *gaudi = hdev->asic_specific; 3859 int rc; 3860 3861 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3862 return 0; 3863 3864 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3865 return 0; 3866 3867 /* 3868 * The device CPU works with 40 bits addresses. 3869 * This register sets the extension to 50 bits. 3870 */ 3871 if (!hdev->asic_prop.fw_security_enabled) 3872 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3873 3874 rc = hl_fw_init_cpu(hdev); 3875 3876 if (rc) 3877 return rc; 3878 3879 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3880 3881 return 0; 3882 } 3883 3884 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3885 { 3886 struct cpu_dyn_regs *dyn_regs = 3887 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3888 struct asic_fixed_properties *prop = &hdev->asic_prop; 3889 struct gaudi_device *gaudi = hdev->asic_specific; 3890 u32 status, irq_handler_offset; 3891 struct hl_eq *eq; 3892 struct hl_hw_queue *cpu_pq = 3893 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3894 int err; 3895 3896 if (!hdev->cpu_queues_enable) 3897 return 0; 3898 3899 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3900 return 0; 3901 3902 eq = &hdev->event_queue; 3903 3904 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3905 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3906 3907 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3908 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3909 3910 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3911 lower_32_bits(hdev->cpu_accessible_dma_address)); 3912 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3913 upper_32_bits(hdev->cpu_accessible_dma_address)); 3914 3915 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3916 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3917 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3918 3919 /* Used for EQ CI */ 3920 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3921 3922 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3923 3924 if (gaudi->multi_msi_mode) 3925 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 3926 else 3927 WREG32(mmCPU_IF_QUEUE_INIT, 3928 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3929 3930 irq_handler_offset = prop->gic_interrupts_enable ? 3931 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3932 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3933 3934 WREG32(irq_handler_offset, 3935 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3936 3937 err = hl_poll_timeout( 3938 hdev, 3939 mmCPU_IF_QUEUE_INIT, 3940 status, 3941 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3942 1000, 3943 cpu_timeout); 3944 3945 if (err) { 3946 dev_err(hdev->dev, 3947 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3948 return -EIO; 3949 } 3950 3951 /* update FW application security bits */ 3952 if (prop->fw_cpu_boot_dev_sts0_valid) 3953 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3954 if (prop->fw_cpu_boot_dev_sts1_valid) 3955 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3956 3957 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3958 return 0; 3959 } 3960 3961 static void gaudi_pre_hw_init(struct hl_device *hdev) 3962 { 3963 /* Perform read from the device to make sure device is up */ 3964 RREG32(mmHW_STATE); 3965 3966 if (!hdev->asic_prop.fw_security_enabled) { 3967 /* Set the access through PCI bars (Linux driver only) as 3968 * secured 3969 */ 3970 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3971 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3972 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3973 3974 /* Perform read to flush the waiting writes to ensure 3975 * configuration was set in the device 3976 */ 3977 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3978 } 3979 3980 /* 3981 * Let's mark in the H/W that we have reached this point. We check 3982 * this value in the reset_before_init function to understand whether 3983 * we need to reset the chip before doing H/W init. This register is 3984 * cleared by the H/W upon H/W reset 3985 */ 3986 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3987 } 3988 3989 static int gaudi_hw_init(struct hl_device *hdev) 3990 { 3991 struct gaudi_device *gaudi = hdev->asic_specific; 3992 int rc; 3993 3994 gaudi_pre_hw_init(hdev); 3995 3996 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3997 * So we set it here and if anyone tries to move it later to 3998 * a different address, there will be an error 3999 */ 4000 if (hdev->asic_prop.iatu_done_by_fw) 4001 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 4002 4003 /* 4004 * Before pushing u-boot/linux to device, need to set the hbm bar to 4005 * base address of dram 4006 */ 4007 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 4008 dev_err(hdev->dev, 4009 "failed to map HBM bar to DRAM base address\n"); 4010 return -EIO; 4011 } 4012 4013 rc = gaudi_init_cpu(hdev); 4014 if (rc) { 4015 dev_err(hdev->dev, "failed to initialize CPU\n"); 4016 return rc; 4017 } 4018 4019 /* In case the clock gating was enabled in preboot we need to disable 4020 * it here before touching the MME/TPC registers. 4021 */ 4022 gaudi_disable_clock_gating(hdev); 4023 4024 /* SRAM scrambler must be initialized after CPU is running from HBM */ 4025 gaudi_init_scrambler_sram(hdev); 4026 4027 /* This is here just in case we are working without CPU */ 4028 gaudi_init_scrambler_hbm(hdev); 4029 4030 gaudi_init_golden_registers(hdev); 4031 4032 rc = gaudi_mmu_init(hdev); 4033 if (rc) 4034 return rc; 4035 4036 gaudi_init_security(hdev); 4037 4038 gaudi_init_pci_dma_qmans(hdev); 4039 4040 gaudi_init_hbm_dma_qmans(hdev); 4041 4042 gaudi_init_mme_qmans(hdev); 4043 4044 gaudi_init_tpc_qmans(hdev); 4045 4046 gaudi_init_nic_qmans(hdev); 4047 4048 gaudi_enable_timestamp(hdev); 4049 4050 /* MSI must be enabled before CPU queues and NIC are initialized */ 4051 rc = gaudi_enable_msi(hdev); 4052 if (rc) 4053 goto disable_queues; 4054 4055 /* must be called after MSI was enabled */ 4056 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 4057 if (rc) { 4058 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 4059 rc); 4060 goto disable_msi; 4061 } 4062 4063 /* Perform read from the device to flush all configuration */ 4064 RREG32(mmHW_STATE); 4065 4066 return 0; 4067 4068 disable_msi: 4069 gaudi_disable_msi(hdev); 4070 disable_queues: 4071 gaudi_disable_mme_qmans(hdev); 4072 gaudi_disable_pci_dma_qmans(hdev); 4073 4074 return rc; 4075 } 4076 4077 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4078 { 4079 struct cpu_dyn_regs *dyn_regs = 4080 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4081 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4082 struct gaudi_device *gaudi = hdev->asic_specific; 4083 bool driver_performs_reset; 4084 4085 if (!hard_reset) { 4086 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4087 return 0; 4088 } 4089 4090 if (hdev->pldm) { 4091 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4092 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4093 } else { 4094 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4095 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4096 } 4097 4098 if (fw_reset) { 4099 dev_dbg(hdev->dev, 4100 "Firmware performs HARD reset, going to wait %dms\n", 4101 reset_timeout_ms); 4102 4103 goto skip_reset; 4104 } 4105 4106 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4107 !hdev->asic_prop.hard_reset_done_by_fw); 4108 4109 /* Set device to handle FLR by H/W as we will put the device CPU to 4110 * halt mode 4111 */ 4112 if (driver_performs_reset) 4113 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4114 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4115 4116 /* If linux is loaded in the device CPU we need to communicate with it 4117 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4118 * registers in case of old F/Ws 4119 */ 4120 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4121 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4122 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4123 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4124 4125 WREG32(irq_handler_offset, 4126 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4127 4128 /* This is a hail-mary attempt to revive the card in the small chance that the 4129 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4130 * In that case, triggering reset through GIC won't help. We need to trigger the 4131 * reset as if Linux wasn't loaded. 4132 * 4133 * We do it only if the reset cause was HB, because that would be the indication 4134 * of such an event. 4135 * 4136 * In case watchdog hasn't expired but we still got HB, then this won't do any 4137 * damage. 4138 */ 4139 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4140 if (hdev->asic_prop.hard_reset_done_by_fw) 4141 hl_fw_ask_hard_reset_without_linux(hdev); 4142 else 4143 hl_fw_ask_halt_machine_without_linux(hdev); 4144 } 4145 } else { 4146 if (hdev->asic_prop.hard_reset_done_by_fw) 4147 hl_fw_ask_hard_reset_without_linux(hdev); 4148 else 4149 hl_fw_ask_halt_machine_without_linux(hdev); 4150 } 4151 4152 if (driver_performs_reset) { 4153 4154 /* Configure the reset registers. Must be done as early as 4155 * possible in case we fail during H/W initialization 4156 */ 4157 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4158 (CFG_RST_H_DMA_MASK | 4159 CFG_RST_H_MME_MASK | 4160 CFG_RST_H_SM_MASK | 4161 CFG_RST_H_TPC_7_MASK)); 4162 4163 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4164 4165 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4166 (CFG_RST_H_HBM_MASK | 4167 CFG_RST_H_TPC_7_MASK | 4168 CFG_RST_H_NIC_MASK | 4169 CFG_RST_H_SM_MASK | 4170 CFG_RST_H_DMA_MASK | 4171 CFG_RST_H_MME_MASK | 4172 CFG_RST_H_CPU_MASK | 4173 CFG_RST_H_MMU_MASK)); 4174 4175 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4176 (CFG_RST_L_IF_MASK | 4177 CFG_RST_L_PSOC_MASK | 4178 CFG_RST_L_TPC_MASK)); 4179 4180 msleep(cpu_timeout_ms); 4181 4182 /* Tell ASIC not to re-initialize PCIe */ 4183 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4184 4185 /* Restart BTL/BLR upon hard-reset */ 4186 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4187 4188 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4189 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4190 4191 dev_dbg(hdev->dev, 4192 "Issued HARD reset command, going to wait %dms\n", 4193 reset_timeout_ms); 4194 } else { 4195 dev_dbg(hdev->dev, 4196 "Firmware performs HARD reset, going to wait %dms\n", 4197 reset_timeout_ms); 4198 } 4199 4200 skip_reset: 4201 /* 4202 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4203 * itself is in reset. Need to wait until the reset is deasserted 4204 */ 4205 msleep(reset_timeout_ms); 4206 4207 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4208 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) 4209 dev_err(hdev->dev, 4210 "Timeout while waiting for device to reset 0x%x\n", 4211 status); 4212 4213 if (gaudi) { 4214 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4215 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4216 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4217 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4218 HW_CAP_HBM_SCRAMBLER); 4219 4220 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4221 4222 hdev->device_cpu_is_halted = false; 4223 } 4224 return 0; 4225 } 4226 4227 static int gaudi_suspend(struct hl_device *hdev) 4228 { 4229 int rc; 4230 4231 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4232 if (rc) 4233 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4234 4235 return rc; 4236 } 4237 4238 static int gaudi_resume(struct hl_device *hdev) 4239 { 4240 return gaudi_init_iatu(hdev); 4241 } 4242 4243 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4244 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4245 { 4246 int rc; 4247 4248 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4249 VM_DONTCOPY | VM_NORESERVE); 4250 4251 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4252 (dma_addr - HOST_PHYS_BASE), size); 4253 if (rc) 4254 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4255 4256 return rc; 4257 } 4258 4259 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4260 { 4261 struct cpu_dyn_regs *dyn_regs = 4262 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4263 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4264 struct gaudi_device *gaudi = hdev->asic_specific; 4265 bool invalid_queue = false; 4266 int dma_id; 4267 4268 switch (hw_queue_id) { 4269 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4270 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4271 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4272 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4273 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4274 break; 4275 4276 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4277 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4278 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4279 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4280 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4281 break; 4282 4283 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4284 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4285 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4286 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4287 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4288 break; 4289 4290 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4291 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4292 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4293 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4294 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4295 break; 4296 4297 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4298 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4299 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4300 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4301 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4302 break; 4303 4304 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4305 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4306 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4307 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4308 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4309 break; 4310 4311 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4312 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4313 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4314 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4315 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4316 break; 4317 4318 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4319 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4320 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4321 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4322 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4323 break; 4324 4325 case GAUDI_QUEUE_ID_CPU_PQ: 4326 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4327 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4328 else 4329 invalid_queue = true; 4330 break; 4331 4332 case GAUDI_QUEUE_ID_MME_0_0: 4333 db_reg_offset = mmMME2_QM_PQ_PI_0; 4334 break; 4335 4336 case GAUDI_QUEUE_ID_MME_0_1: 4337 db_reg_offset = mmMME2_QM_PQ_PI_1; 4338 break; 4339 4340 case GAUDI_QUEUE_ID_MME_0_2: 4341 db_reg_offset = mmMME2_QM_PQ_PI_2; 4342 break; 4343 4344 case GAUDI_QUEUE_ID_MME_0_3: 4345 db_reg_offset = mmMME2_QM_PQ_PI_3; 4346 break; 4347 4348 case GAUDI_QUEUE_ID_MME_1_0: 4349 db_reg_offset = mmMME0_QM_PQ_PI_0; 4350 break; 4351 4352 case GAUDI_QUEUE_ID_MME_1_1: 4353 db_reg_offset = mmMME0_QM_PQ_PI_1; 4354 break; 4355 4356 case GAUDI_QUEUE_ID_MME_1_2: 4357 db_reg_offset = mmMME0_QM_PQ_PI_2; 4358 break; 4359 4360 case GAUDI_QUEUE_ID_MME_1_3: 4361 db_reg_offset = mmMME0_QM_PQ_PI_3; 4362 break; 4363 4364 case GAUDI_QUEUE_ID_TPC_0_0: 4365 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4366 break; 4367 4368 case GAUDI_QUEUE_ID_TPC_0_1: 4369 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4370 break; 4371 4372 case GAUDI_QUEUE_ID_TPC_0_2: 4373 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4374 break; 4375 4376 case GAUDI_QUEUE_ID_TPC_0_3: 4377 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4378 break; 4379 4380 case GAUDI_QUEUE_ID_TPC_1_0: 4381 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4382 break; 4383 4384 case GAUDI_QUEUE_ID_TPC_1_1: 4385 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4386 break; 4387 4388 case GAUDI_QUEUE_ID_TPC_1_2: 4389 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4390 break; 4391 4392 case GAUDI_QUEUE_ID_TPC_1_3: 4393 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4394 break; 4395 4396 case GAUDI_QUEUE_ID_TPC_2_0: 4397 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4398 break; 4399 4400 case GAUDI_QUEUE_ID_TPC_2_1: 4401 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4402 break; 4403 4404 case GAUDI_QUEUE_ID_TPC_2_2: 4405 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4406 break; 4407 4408 case GAUDI_QUEUE_ID_TPC_2_3: 4409 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4410 break; 4411 4412 case GAUDI_QUEUE_ID_TPC_3_0: 4413 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4414 break; 4415 4416 case GAUDI_QUEUE_ID_TPC_3_1: 4417 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4418 break; 4419 4420 case GAUDI_QUEUE_ID_TPC_3_2: 4421 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4422 break; 4423 4424 case GAUDI_QUEUE_ID_TPC_3_3: 4425 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4426 break; 4427 4428 case GAUDI_QUEUE_ID_TPC_4_0: 4429 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4430 break; 4431 4432 case GAUDI_QUEUE_ID_TPC_4_1: 4433 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4434 break; 4435 4436 case GAUDI_QUEUE_ID_TPC_4_2: 4437 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4438 break; 4439 4440 case GAUDI_QUEUE_ID_TPC_4_3: 4441 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4442 break; 4443 4444 case GAUDI_QUEUE_ID_TPC_5_0: 4445 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4446 break; 4447 4448 case GAUDI_QUEUE_ID_TPC_5_1: 4449 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4450 break; 4451 4452 case GAUDI_QUEUE_ID_TPC_5_2: 4453 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4454 break; 4455 4456 case GAUDI_QUEUE_ID_TPC_5_3: 4457 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4458 break; 4459 4460 case GAUDI_QUEUE_ID_TPC_6_0: 4461 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4462 break; 4463 4464 case GAUDI_QUEUE_ID_TPC_6_1: 4465 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4466 break; 4467 4468 case GAUDI_QUEUE_ID_TPC_6_2: 4469 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4470 break; 4471 4472 case GAUDI_QUEUE_ID_TPC_6_3: 4473 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4474 break; 4475 4476 case GAUDI_QUEUE_ID_TPC_7_0: 4477 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4478 break; 4479 4480 case GAUDI_QUEUE_ID_TPC_7_1: 4481 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4482 break; 4483 4484 case GAUDI_QUEUE_ID_TPC_7_2: 4485 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4486 break; 4487 4488 case GAUDI_QUEUE_ID_TPC_7_3: 4489 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4490 break; 4491 4492 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4493 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4494 invalid_queue = true; 4495 4496 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4497 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4498 break; 4499 4500 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4501 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4502 invalid_queue = true; 4503 4504 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4505 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4506 break; 4507 4508 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4509 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4510 invalid_queue = true; 4511 4512 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4513 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4514 break; 4515 4516 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4517 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4518 invalid_queue = true; 4519 4520 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4521 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4522 break; 4523 4524 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4525 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4526 invalid_queue = true; 4527 4528 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4529 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4530 break; 4531 4532 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4533 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4534 invalid_queue = true; 4535 4536 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4537 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4538 break; 4539 4540 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4541 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4542 invalid_queue = true; 4543 4544 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4545 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4546 break; 4547 4548 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4549 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4550 invalid_queue = true; 4551 4552 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4553 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4554 break; 4555 4556 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4557 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4558 invalid_queue = true; 4559 4560 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4561 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4562 break; 4563 4564 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4565 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4566 invalid_queue = true; 4567 4568 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4569 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4570 break; 4571 4572 default: 4573 invalid_queue = true; 4574 } 4575 4576 if (invalid_queue) { 4577 /* Should never get here */ 4578 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4579 hw_queue_id); 4580 return; 4581 } 4582 4583 db_value = pi; 4584 4585 /* ring the doorbell */ 4586 WREG32(db_reg_offset, db_value); 4587 4588 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4589 /* make sure device CPU will read latest data from host */ 4590 mb(); 4591 4592 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4593 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4594 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4595 4596 WREG32(irq_handler_offset, 4597 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4598 } 4599 } 4600 4601 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4602 struct hl_bd *bd) 4603 { 4604 __le64 *pbd = (__le64 *) bd; 4605 4606 /* The QMANs are on the host memory so a simple copy suffice */ 4607 pqe[0] = pbd[0]; 4608 pqe[1] = pbd[1]; 4609 } 4610 4611 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4612 dma_addr_t *dma_handle, gfp_t flags) 4613 { 4614 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4615 dma_handle, flags); 4616 4617 /* Shift to the device's base physical address of host memory */ 4618 if (kernel_addr) 4619 *dma_handle += HOST_PHYS_BASE; 4620 4621 return kernel_addr; 4622 } 4623 4624 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4625 void *cpu_addr, dma_addr_t dma_handle) 4626 { 4627 /* Cancel the device's base physical address of host memory */ 4628 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4629 4630 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4631 } 4632 4633 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4634 { 4635 struct asic_fixed_properties *prop = &hdev->asic_prop; 4636 u64 cur_addr = prop->dram_user_base_address; 4637 u32 chunk_size, busy; 4638 int rc, dma_id; 4639 4640 while (cur_addr < prop->dram_end_address) { 4641 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4642 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4643 4644 chunk_size = 4645 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4646 4647 dev_dbg(hdev->dev, 4648 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4649 cur_addr, cur_addr + chunk_size); 4650 4651 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4652 lower_32_bits(val)); 4653 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4654 upper_32_bits(val)); 4655 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4656 lower_32_bits(cur_addr)); 4657 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4658 upper_32_bits(cur_addr)); 4659 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4660 chunk_size); 4661 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4662 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4663 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4664 4665 cur_addr += chunk_size; 4666 4667 if (cur_addr == prop->dram_end_address) 4668 break; 4669 } 4670 4671 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4672 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4673 4674 rc = hl_poll_timeout( 4675 hdev, 4676 mmDMA0_CORE_STS0 + dma_offset, 4677 busy, 4678 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4679 1000, 4680 HBM_SCRUBBING_TIMEOUT_US); 4681 4682 if (rc) { 4683 dev_err(hdev->dev, 4684 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4685 dma_id); 4686 return -EIO; 4687 } 4688 } 4689 } 4690 4691 return 0; 4692 } 4693 4694 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4695 { 4696 struct asic_fixed_properties *prop = &hdev->asic_prop; 4697 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : 4698 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); 4699 u64 addr, size, val = hdev->memory_scrub_val; 4700 ktime_t timeout; 4701 int rc = 0; 4702 4703 if (!hdev->memory_scrub) 4704 return 0; 4705 4706 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4707 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4708 if (ktime_compare(ktime_get(), timeout) > 0) { 4709 dev_err(hdev->dev, "waiting for idle timeout\n"); 4710 return -ETIMEDOUT; 4711 } 4712 usleep_range((1000 >> 2) + 1, 1000); 4713 } 4714 4715 /* Scrub SRAM */ 4716 addr = prop->sram_user_base_address; 4717 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4718 4719 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4720 addr, addr + size, val); 4721 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4722 if (rc) { 4723 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4724 return rc; 4725 } 4726 4727 /* Scrub HBM using all DMA channels in parallel */ 4728 rc = gaudi_scrub_device_dram(hdev, val); 4729 if (rc) { 4730 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4731 return rc; 4732 } 4733 4734 return 0; 4735 } 4736 4737 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4738 u32 queue_id, dma_addr_t *dma_handle, 4739 u16 *queue_len) 4740 { 4741 struct gaudi_device *gaudi = hdev->asic_specific; 4742 struct gaudi_internal_qman_info *q; 4743 4744 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4745 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4746 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4747 return NULL; 4748 } 4749 4750 q = &gaudi->internal_qmans[queue_id]; 4751 *dma_handle = q->pq_dma_addr; 4752 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4753 4754 return q->pq_kernel_addr; 4755 } 4756 4757 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4758 u16 len, u32 timeout, u64 *result) 4759 { 4760 struct gaudi_device *gaudi = hdev->asic_specific; 4761 4762 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4763 if (result) 4764 *result = 0; 4765 return 0; 4766 } 4767 4768 if (!timeout) 4769 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4770 4771 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4772 timeout, result); 4773 } 4774 4775 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4776 { 4777 struct packet_msg_prot *fence_pkt; 4778 dma_addr_t pkt_dma_addr; 4779 u32 fence_val, tmp, timeout_usec; 4780 dma_addr_t fence_dma_addr; 4781 u32 *fence_ptr; 4782 int rc; 4783 4784 if (hdev->pldm) 4785 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4786 else 4787 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4788 4789 fence_val = GAUDI_QMAN0_FENCE_VAL; 4790 4791 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4792 if (!fence_ptr) { 4793 dev_err(hdev->dev, 4794 "Failed to allocate memory for H/W queue %d testing\n", 4795 hw_queue_id); 4796 return -ENOMEM; 4797 } 4798 4799 *fence_ptr = 0; 4800 4801 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4802 &pkt_dma_addr); 4803 if (!fence_pkt) { 4804 dev_err(hdev->dev, 4805 "Failed to allocate packet for H/W queue %d testing\n", 4806 hw_queue_id); 4807 rc = -ENOMEM; 4808 goto free_fence_ptr; 4809 } 4810 4811 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4812 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4813 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4814 4815 fence_pkt->ctl = cpu_to_le32(tmp); 4816 fence_pkt->value = cpu_to_le32(fence_val); 4817 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4818 4819 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4820 sizeof(struct packet_msg_prot), 4821 pkt_dma_addr); 4822 if (rc) { 4823 dev_err(hdev->dev, 4824 "Failed to send fence packet to H/W queue %d\n", 4825 hw_queue_id); 4826 goto free_pkt; 4827 } 4828 4829 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4830 1000, timeout_usec, true); 4831 4832 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4833 4834 if (rc == -ETIMEDOUT) { 4835 dev_err(hdev->dev, 4836 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4837 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4838 rc = -EIO; 4839 } 4840 4841 free_pkt: 4842 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4843 free_fence_ptr: 4844 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4845 return rc; 4846 } 4847 4848 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4849 { 4850 struct gaudi_device *gaudi = hdev->asic_specific; 4851 4852 /* 4853 * check capability here as send_cpu_message() won't update the result 4854 * value if no capability 4855 */ 4856 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4857 return 0; 4858 4859 return hl_fw_test_cpu_queue(hdev); 4860 } 4861 4862 static int gaudi_test_queues(struct hl_device *hdev) 4863 { 4864 int i, rc, ret_val = 0; 4865 4866 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4867 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4868 rc = gaudi_test_queue(hdev, i); 4869 if (rc) 4870 ret_val = -EINVAL; 4871 } 4872 } 4873 4874 rc = gaudi_test_cpu_queue(hdev); 4875 if (rc) 4876 ret_val = -EINVAL; 4877 4878 return ret_val; 4879 } 4880 4881 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4882 gfp_t mem_flags, dma_addr_t *dma_handle) 4883 { 4884 void *kernel_addr; 4885 4886 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4887 return NULL; 4888 4889 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4890 4891 /* Shift to the device's base physical address of host memory */ 4892 if (kernel_addr) 4893 *dma_handle += HOST_PHYS_BASE; 4894 4895 return kernel_addr; 4896 } 4897 4898 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4899 dma_addr_t dma_addr) 4900 { 4901 /* Cancel the device's base physical address of host memory */ 4902 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4903 4904 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4905 } 4906 4907 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4908 size_t size, dma_addr_t *dma_handle) 4909 { 4910 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4911 } 4912 4913 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4914 size_t size, void *vaddr) 4915 { 4916 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4917 } 4918 4919 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4920 { 4921 struct scatterlist *sg, *sg_next_iter; 4922 u32 count, dma_desc_cnt; 4923 u64 len, len_next; 4924 dma_addr_t addr, addr_next; 4925 4926 dma_desc_cnt = 0; 4927 4928 for_each_sgtable_dma_sg(sgt, sg, count) { 4929 len = sg_dma_len(sg); 4930 addr = sg_dma_address(sg); 4931 4932 if (len == 0) 4933 break; 4934 4935 while ((count + 1) < sgt->nents) { 4936 sg_next_iter = sg_next(sg); 4937 len_next = sg_dma_len(sg_next_iter); 4938 addr_next = sg_dma_address(sg_next_iter); 4939 4940 if (len_next == 0) 4941 break; 4942 4943 if ((addr + len == addr_next) && 4944 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4945 len += len_next; 4946 count++; 4947 sg = sg_next_iter; 4948 } else { 4949 break; 4950 } 4951 } 4952 4953 dma_desc_cnt++; 4954 } 4955 4956 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4957 } 4958 4959 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4960 struct hl_cs_parser *parser, 4961 struct packet_lin_dma *user_dma_pkt, 4962 u64 addr, enum dma_data_direction dir) 4963 { 4964 struct hl_userptr *userptr; 4965 int rc; 4966 4967 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4968 parser->job_userptr_list, &userptr)) 4969 goto already_pinned; 4970 4971 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4972 if (!userptr) 4973 return -ENOMEM; 4974 4975 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4976 userptr); 4977 if (rc) 4978 goto free_userptr; 4979 4980 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4981 4982 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 4983 if (rc) { 4984 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4985 goto unpin_memory; 4986 } 4987 4988 userptr->dma_mapped = true; 4989 userptr->dir = dir; 4990 4991 already_pinned: 4992 parser->patched_cb_size += 4993 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4994 4995 return 0; 4996 4997 unpin_memory: 4998 list_del(&userptr->job_node); 4999 hl_unpin_host_memory(hdev, userptr); 5000 free_userptr: 5001 kfree(userptr); 5002 return rc; 5003 } 5004 5005 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 5006 struct hl_cs_parser *parser, 5007 struct packet_lin_dma *user_dma_pkt, 5008 bool src_in_host) 5009 { 5010 enum dma_data_direction dir; 5011 bool skip_host_mem_pin = false, user_memset; 5012 u64 addr; 5013 int rc = 0; 5014 5015 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 5016 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5017 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5018 5019 if (src_in_host) { 5020 if (user_memset) 5021 skip_host_mem_pin = true; 5022 5023 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 5024 dir = DMA_TO_DEVICE; 5025 addr = le64_to_cpu(user_dma_pkt->src_addr); 5026 } else { 5027 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 5028 dir = DMA_FROM_DEVICE; 5029 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5030 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5031 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5032 } 5033 5034 if (skip_host_mem_pin) 5035 parser->patched_cb_size += sizeof(*user_dma_pkt); 5036 else 5037 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 5038 addr, dir); 5039 5040 return rc; 5041 } 5042 5043 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 5044 struct hl_cs_parser *parser, 5045 struct packet_lin_dma *user_dma_pkt) 5046 { 5047 bool src_in_host = false; 5048 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 5049 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 5050 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 5051 5052 dev_dbg(hdev->dev, "DMA packet details:\n"); 5053 dev_dbg(hdev->dev, "source == 0x%llx\n", 5054 le64_to_cpu(user_dma_pkt->src_addr)); 5055 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 5056 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 5057 5058 /* 5059 * Special handling for DMA with size 0. Bypass all validations 5060 * because no transactions will be done except for WR_COMP, which 5061 * is not a security issue 5062 */ 5063 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5064 parser->patched_cb_size += sizeof(*user_dma_pkt); 5065 return 0; 5066 } 5067 5068 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5069 src_in_host = true; 5070 5071 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5072 src_in_host); 5073 } 5074 5075 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5076 struct hl_cs_parser *parser, 5077 struct packet_load_and_exe *user_pkt) 5078 { 5079 u32 cfg; 5080 5081 cfg = le32_to_cpu(user_pkt->cfg); 5082 5083 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5084 dev_err(hdev->dev, 5085 "User not allowed to use Load and Execute\n"); 5086 return -EPERM; 5087 } 5088 5089 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5090 5091 return 0; 5092 } 5093 5094 static int gaudi_validate_cb(struct hl_device *hdev, 5095 struct hl_cs_parser *parser, bool is_mmu) 5096 { 5097 u32 cb_parsed_length = 0; 5098 int rc = 0; 5099 5100 parser->patched_cb_size = 0; 5101 5102 /* cb_user_size is more than 0 so loop will always be executed */ 5103 while (cb_parsed_length < parser->user_cb_size) { 5104 enum packet_id pkt_id; 5105 u16 pkt_size; 5106 struct gaudi_packet *user_pkt; 5107 5108 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5109 5110 pkt_id = (enum packet_id) ( 5111 (le64_to_cpu(user_pkt->header) & 5112 PACKET_HEADER_PACKET_ID_MASK) >> 5113 PACKET_HEADER_PACKET_ID_SHIFT); 5114 5115 if (!validate_packet_id(pkt_id)) { 5116 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5117 rc = -EINVAL; 5118 break; 5119 } 5120 5121 pkt_size = gaudi_packet_sizes[pkt_id]; 5122 cb_parsed_length += pkt_size; 5123 if (cb_parsed_length > parser->user_cb_size) { 5124 dev_err(hdev->dev, 5125 "packet 0x%x is out of CB boundary\n", pkt_id); 5126 rc = -EINVAL; 5127 break; 5128 } 5129 5130 switch (pkt_id) { 5131 case PACKET_MSG_PROT: 5132 dev_err(hdev->dev, 5133 "User not allowed to use MSG_PROT\n"); 5134 rc = -EPERM; 5135 break; 5136 5137 case PACKET_CP_DMA: 5138 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5139 rc = -EPERM; 5140 break; 5141 5142 case PACKET_STOP: 5143 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5144 rc = -EPERM; 5145 break; 5146 5147 case PACKET_WREG_BULK: 5148 dev_err(hdev->dev, 5149 "User not allowed to use WREG_BULK\n"); 5150 rc = -EPERM; 5151 break; 5152 5153 case PACKET_LOAD_AND_EXE: 5154 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5155 (struct packet_load_and_exe *) user_pkt); 5156 break; 5157 5158 case PACKET_LIN_DMA: 5159 parser->contains_dma_pkt = true; 5160 if (is_mmu) 5161 parser->patched_cb_size += pkt_size; 5162 else 5163 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5164 (struct packet_lin_dma *) user_pkt); 5165 break; 5166 5167 case PACKET_WREG_32: 5168 case PACKET_MSG_LONG: 5169 case PACKET_MSG_SHORT: 5170 case PACKET_REPEAT: 5171 case PACKET_FENCE: 5172 case PACKET_NOP: 5173 case PACKET_ARB_POINT: 5174 parser->patched_cb_size += pkt_size; 5175 break; 5176 5177 default: 5178 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5179 pkt_id); 5180 rc = -EINVAL; 5181 break; 5182 } 5183 5184 if (rc) 5185 break; 5186 } 5187 5188 /* 5189 * The new CB should have space at the end for two MSG_PROT packets: 5190 * 1. Optional NOP padding for cacheline alignment 5191 * 2. A packet that will act as a completion packet 5192 * 3. A packet that will generate MSI interrupt 5193 */ 5194 if (parser->completion) 5195 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5196 parser->patched_cb_size); 5197 5198 return rc; 5199 } 5200 5201 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5202 struct hl_cs_parser *parser, 5203 struct packet_lin_dma *user_dma_pkt, 5204 struct packet_lin_dma *new_dma_pkt, 5205 u32 *new_dma_pkt_size) 5206 { 5207 struct hl_userptr *userptr; 5208 struct scatterlist *sg, *sg_next_iter; 5209 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5210 u64 len, len_next; 5211 dma_addr_t dma_addr, dma_addr_next; 5212 u64 device_memory_addr, addr; 5213 enum dma_data_direction dir; 5214 struct sg_table *sgt; 5215 bool src_in_host = false; 5216 bool skip_host_mem_pin = false; 5217 bool user_memset; 5218 5219 ctl = le32_to_cpu(user_dma_pkt->ctl); 5220 5221 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5222 src_in_host = true; 5223 5224 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5225 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5226 5227 if (src_in_host) { 5228 addr = le64_to_cpu(user_dma_pkt->src_addr); 5229 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5230 dir = DMA_TO_DEVICE; 5231 if (user_memset) 5232 skip_host_mem_pin = true; 5233 } else { 5234 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5235 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5236 dir = DMA_FROM_DEVICE; 5237 } 5238 5239 if ((!skip_host_mem_pin) && 5240 (!hl_userptr_is_pinned(hdev, addr, 5241 le32_to_cpu(user_dma_pkt->tsize), 5242 parser->job_userptr_list, &userptr))) { 5243 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5244 addr, user_dma_pkt->tsize); 5245 return -EFAULT; 5246 } 5247 5248 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5249 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5250 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5251 return 0; 5252 } 5253 5254 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5255 5256 sgt = userptr->sgt; 5257 dma_desc_cnt = 0; 5258 5259 for_each_sgtable_dma_sg(sgt, sg, count) { 5260 len = sg_dma_len(sg); 5261 dma_addr = sg_dma_address(sg); 5262 5263 if (len == 0) 5264 break; 5265 5266 while ((count + 1) < sgt->nents) { 5267 sg_next_iter = sg_next(sg); 5268 len_next = sg_dma_len(sg_next_iter); 5269 dma_addr_next = sg_dma_address(sg_next_iter); 5270 5271 if (len_next == 0) 5272 break; 5273 5274 if ((dma_addr + len == dma_addr_next) && 5275 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5276 len += len_next; 5277 count++; 5278 sg = sg_next_iter; 5279 } else { 5280 break; 5281 } 5282 } 5283 5284 ctl = le32_to_cpu(user_dma_pkt->ctl); 5285 if (likely(dma_desc_cnt)) 5286 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5287 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5288 new_dma_pkt->ctl = cpu_to_le32(ctl); 5289 new_dma_pkt->tsize = cpu_to_le32(len); 5290 5291 if (dir == DMA_TO_DEVICE) { 5292 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5293 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5294 } else { 5295 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5296 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5297 } 5298 5299 if (!user_memset) 5300 device_memory_addr += len; 5301 dma_desc_cnt++; 5302 new_dma_pkt++; 5303 } 5304 5305 if (!dma_desc_cnt) { 5306 dev_err(hdev->dev, 5307 "Error of 0 SG entries when patching DMA packet\n"); 5308 return -EFAULT; 5309 } 5310 5311 /* Fix the last dma packet - wrcomp must be as user set it */ 5312 new_dma_pkt--; 5313 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5314 5315 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5316 5317 return 0; 5318 } 5319 5320 static int gaudi_patch_cb(struct hl_device *hdev, 5321 struct hl_cs_parser *parser) 5322 { 5323 u32 cb_parsed_length = 0; 5324 u32 cb_patched_cur_length = 0; 5325 int rc = 0; 5326 5327 /* cb_user_size is more than 0 so loop will always be executed */ 5328 while (cb_parsed_length < parser->user_cb_size) { 5329 enum packet_id pkt_id; 5330 u16 pkt_size; 5331 u32 new_pkt_size = 0; 5332 struct gaudi_packet *user_pkt, *kernel_pkt; 5333 5334 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5335 kernel_pkt = parser->patched_cb->kernel_address + 5336 cb_patched_cur_length; 5337 5338 pkt_id = (enum packet_id) ( 5339 (le64_to_cpu(user_pkt->header) & 5340 PACKET_HEADER_PACKET_ID_MASK) >> 5341 PACKET_HEADER_PACKET_ID_SHIFT); 5342 5343 if (!validate_packet_id(pkt_id)) { 5344 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5345 rc = -EINVAL; 5346 break; 5347 } 5348 5349 pkt_size = gaudi_packet_sizes[pkt_id]; 5350 cb_parsed_length += pkt_size; 5351 if (cb_parsed_length > parser->user_cb_size) { 5352 dev_err(hdev->dev, 5353 "packet 0x%x is out of CB boundary\n", pkt_id); 5354 rc = -EINVAL; 5355 break; 5356 } 5357 5358 switch (pkt_id) { 5359 case PACKET_LIN_DMA: 5360 rc = gaudi_patch_dma_packet(hdev, parser, 5361 (struct packet_lin_dma *) user_pkt, 5362 (struct packet_lin_dma *) kernel_pkt, 5363 &new_pkt_size); 5364 cb_patched_cur_length += new_pkt_size; 5365 break; 5366 5367 case PACKET_MSG_PROT: 5368 dev_err(hdev->dev, 5369 "User not allowed to use MSG_PROT\n"); 5370 rc = -EPERM; 5371 break; 5372 5373 case PACKET_CP_DMA: 5374 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5375 rc = -EPERM; 5376 break; 5377 5378 case PACKET_STOP: 5379 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5380 rc = -EPERM; 5381 break; 5382 5383 case PACKET_WREG_32: 5384 case PACKET_WREG_BULK: 5385 case PACKET_MSG_LONG: 5386 case PACKET_MSG_SHORT: 5387 case PACKET_REPEAT: 5388 case PACKET_FENCE: 5389 case PACKET_NOP: 5390 case PACKET_ARB_POINT: 5391 case PACKET_LOAD_AND_EXE: 5392 memcpy(kernel_pkt, user_pkt, pkt_size); 5393 cb_patched_cur_length += pkt_size; 5394 break; 5395 5396 default: 5397 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5398 pkt_id); 5399 rc = -EINVAL; 5400 break; 5401 } 5402 5403 if (rc) 5404 break; 5405 } 5406 5407 return rc; 5408 } 5409 5410 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5411 struct hl_cs_parser *parser) 5412 { 5413 u64 handle; 5414 u32 patched_cb_size; 5415 struct hl_cb *user_cb; 5416 int rc; 5417 5418 /* 5419 * The new CB should have space at the end for two MSG_PROT packets: 5420 * 1. Optional NOP padding for cacheline alignment 5421 * 2. A packet that will act as a completion packet 5422 * 3. A packet that will generate MSI interrupt 5423 */ 5424 if (parser->completion) 5425 parser->patched_cb_size = parser->user_cb_size + 5426 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5427 else 5428 parser->patched_cb_size = parser->user_cb_size; 5429 5430 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5431 parser->patched_cb_size, false, false, 5432 &handle); 5433 5434 if (rc) { 5435 dev_err(hdev->dev, 5436 "Failed to allocate patched CB for DMA CS %d\n", 5437 rc); 5438 return rc; 5439 } 5440 5441 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5442 /* hl_cb_get should never fail */ 5443 if (!parser->patched_cb) { 5444 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5445 rc = -EFAULT; 5446 goto out; 5447 } 5448 5449 /* 5450 * We are protected from overflow because the check 5451 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5452 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5453 * 5454 * There is no option to reach here without going through that check because: 5455 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5456 * an external queue. 5457 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5458 */ 5459 memcpy(parser->patched_cb->kernel_address, 5460 parser->user_cb->kernel_address, 5461 parser->user_cb_size); 5462 5463 patched_cb_size = parser->patched_cb_size; 5464 5465 /* Validate patched CB instead of user CB */ 5466 user_cb = parser->user_cb; 5467 parser->user_cb = parser->patched_cb; 5468 rc = gaudi_validate_cb(hdev, parser, true); 5469 parser->user_cb = user_cb; 5470 5471 if (rc) { 5472 hl_cb_put(parser->patched_cb); 5473 goto out; 5474 } 5475 5476 if (patched_cb_size != parser->patched_cb_size) { 5477 dev_err(hdev->dev, "user CB size mismatch\n"); 5478 hl_cb_put(parser->patched_cb); 5479 rc = -EINVAL; 5480 goto out; 5481 } 5482 5483 out: 5484 /* 5485 * Always call cb destroy here because we still have 1 reference 5486 * to it by calling cb_get earlier. After the job will be completed, 5487 * cb_put will release it, but here we want to remove it from the 5488 * idr 5489 */ 5490 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5491 5492 return rc; 5493 } 5494 5495 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5496 struct hl_cs_parser *parser) 5497 { 5498 u64 handle; 5499 int rc; 5500 5501 rc = gaudi_validate_cb(hdev, parser, false); 5502 5503 if (rc) 5504 goto free_userptr; 5505 5506 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5507 parser->patched_cb_size, false, false, 5508 &handle); 5509 if (rc) { 5510 dev_err(hdev->dev, 5511 "Failed to allocate patched CB for DMA CS %d\n", rc); 5512 goto free_userptr; 5513 } 5514 5515 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5516 /* hl_cb_get should never fail here */ 5517 if (!parser->patched_cb) { 5518 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5519 rc = -EFAULT; 5520 goto out; 5521 } 5522 5523 rc = gaudi_patch_cb(hdev, parser); 5524 5525 if (rc) 5526 hl_cb_put(parser->patched_cb); 5527 5528 out: 5529 /* 5530 * Always call cb destroy here because we still have 1 reference 5531 * to it by calling cb_get earlier. After the job will be completed, 5532 * cb_put will release it, but here we want to remove it from the 5533 * idr 5534 */ 5535 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5536 5537 free_userptr: 5538 if (rc) 5539 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5540 return rc; 5541 } 5542 5543 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5544 struct hl_cs_parser *parser) 5545 { 5546 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5547 struct gaudi_device *gaudi = hdev->asic_specific; 5548 u32 nic_queue_offset, nic_mask_q_id; 5549 5550 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5551 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5552 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5553 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5554 5555 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5556 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5557 return -EINVAL; 5558 } 5559 } 5560 5561 /* For internal queue jobs just check if CB address is valid */ 5562 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5563 parser->user_cb_size, 5564 asic_prop->sram_user_base_address, 5565 asic_prop->sram_end_address)) 5566 return 0; 5567 5568 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5569 parser->user_cb_size, 5570 asic_prop->dram_user_base_address, 5571 asic_prop->dram_end_address)) 5572 return 0; 5573 5574 /* PMMU and HPMMU addresses are equal, check only one of them */ 5575 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5576 parser->user_cb_size, 5577 asic_prop->pmmu.start_addr, 5578 asic_prop->pmmu.end_addr)) 5579 return 0; 5580 5581 dev_err(hdev->dev, 5582 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5583 parser->user_cb, parser->user_cb_size); 5584 5585 return -EFAULT; 5586 } 5587 5588 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5589 { 5590 struct gaudi_device *gaudi = hdev->asic_specific; 5591 5592 if (parser->queue_type == QUEUE_TYPE_INT) 5593 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5594 5595 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5596 return gaudi_parse_cb_mmu(hdev, parser); 5597 else 5598 return gaudi_parse_cb_no_mmu(hdev, parser); 5599 } 5600 5601 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5602 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5603 u32 msi_vec, bool eb) 5604 { 5605 struct gaudi_device *gaudi = hdev->asic_specific; 5606 struct packet_msg_prot *cq_pkt; 5607 struct packet_nop *cq_padding; 5608 u64 msi_addr; 5609 u32 tmp; 5610 5611 cq_padding = kernel_address + original_len; 5612 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5613 5614 while ((void *)cq_padding < (void *)cq_pkt) { 5615 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5616 cq_padding++; 5617 } 5618 5619 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5620 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5621 5622 if (eb) 5623 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5624 5625 cq_pkt->ctl = cpu_to_le32(tmp); 5626 cq_pkt->value = cpu_to_le32(cq_val); 5627 cq_pkt->addr = cpu_to_le64(cq_addr); 5628 5629 cq_pkt++; 5630 5631 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5632 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5633 cq_pkt->ctl = cpu_to_le32(tmp); 5634 cq_pkt->value = cpu_to_le32(1); 5635 5636 if (gaudi->multi_msi_mode) 5637 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4; 5638 else 5639 msi_addr = mmPCIE_CORE_MSI_REQ; 5640 5641 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5642 } 5643 5644 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5645 { 5646 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5647 } 5648 5649 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5650 u32 size, u64 val) 5651 { 5652 struct packet_lin_dma *lin_dma_pkt; 5653 struct hl_cs_job *job; 5654 u32 cb_size, ctl, err_cause; 5655 struct hl_cb *cb; 5656 int rc; 5657 5658 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5659 if (!cb) 5660 return -EFAULT; 5661 5662 lin_dma_pkt = cb->kernel_address; 5663 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5664 cb_size = sizeof(*lin_dma_pkt); 5665 5666 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5667 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5668 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5669 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5670 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5671 5672 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5673 lin_dma_pkt->src_addr = cpu_to_le64(val); 5674 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5675 lin_dma_pkt->tsize = cpu_to_le32(size); 5676 5677 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5678 if (!job) { 5679 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5680 rc = -ENOMEM; 5681 goto release_cb; 5682 } 5683 5684 /* Verify DMA is OK */ 5685 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5686 if (err_cause && !hdev->init_done) { 5687 dev_dbg(hdev->dev, 5688 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5689 err_cause); 5690 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5691 } 5692 5693 job->id = 0; 5694 job->user_cb = cb; 5695 atomic_inc(&job->user_cb->cs_cnt); 5696 job->user_cb_size = cb_size; 5697 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5698 job->patched_cb = job->user_cb; 5699 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5700 5701 hl_debugfs_add_job(hdev, job); 5702 5703 rc = gaudi_send_job_on_qman0(hdev, job); 5704 hl_debugfs_remove_job(hdev, job); 5705 kfree(job); 5706 atomic_dec(&cb->cs_cnt); 5707 5708 /* Verify DMA is OK */ 5709 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5710 if (err_cause) { 5711 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5712 rc = -EIO; 5713 if (!hdev->init_done) { 5714 dev_dbg(hdev->dev, 5715 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5716 err_cause); 5717 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5718 } 5719 } 5720 5721 release_cb: 5722 hl_cb_put(cb); 5723 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5724 5725 return rc; 5726 } 5727 5728 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5729 u32 num_regs, u32 val) 5730 { 5731 struct packet_msg_long *pkt; 5732 struct hl_cs_job *job; 5733 u32 cb_size, ctl; 5734 struct hl_cb *cb; 5735 int i, rc; 5736 5737 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5738 5739 if (cb_size > SZ_2M) { 5740 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5741 return -ENOMEM; 5742 } 5743 5744 cb = hl_cb_kernel_create(hdev, cb_size, false); 5745 if (!cb) 5746 return -EFAULT; 5747 5748 pkt = cb->kernel_address; 5749 5750 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5751 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5752 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5753 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5754 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5755 5756 for (i = 0; i < num_regs ; i++, pkt++) { 5757 pkt->ctl = cpu_to_le32(ctl); 5758 pkt->value = cpu_to_le32(val); 5759 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5760 } 5761 5762 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5763 if (!job) { 5764 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5765 rc = -ENOMEM; 5766 goto release_cb; 5767 } 5768 5769 job->id = 0; 5770 job->user_cb = cb; 5771 atomic_inc(&job->user_cb->cs_cnt); 5772 job->user_cb_size = cb_size; 5773 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5774 job->patched_cb = job->user_cb; 5775 job->job_cb_size = cb_size; 5776 5777 hl_debugfs_add_job(hdev, job); 5778 5779 rc = gaudi_send_job_on_qman0(hdev, job); 5780 hl_debugfs_remove_job(hdev, job); 5781 kfree(job); 5782 atomic_dec(&cb->cs_cnt); 5783 5784 release_cb: 5785 hl_cb_put(cb); 5786 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5787 5788 return rc; 5789 } 5790 5791 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5792 { 5793 u64 base_addr; 5794 u32 num_regs; 5795 int rc; 5796 5797 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5798 num_regs = NUM_OF_SOB_IN_BLOCK; 5799 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5800 if (rc) { 5801 dev_err(hdev->dev, "failed resetting SM registers"); 5802 return -ENOMEM; 5803 } 5804 5805 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5806 num_regs = NUM_OF_SOB_IN_BLOCK; 5807 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5808 if (rc) { 5809 dev_err(hdev->dev, "failed resetting SM registers"); 5810 return -ENOMEM; 5811 } 5812 5813 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5814 num_regs = NUM_OF_SOB_IN_BLOCK; 5815 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5816 if (rc) { 5817 dev_err(hdev->dev, "failed resetting SM registers"); 5818 return -ENOMEM; 5819 } 5820 5821 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5822 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5823 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5824 if (rc) { 5825 dev_err(hdev->dev, "failed resetting SM registers"); 5826 return -ENOMEM; 5827 } 5828 5829 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5830 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5831 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5832 if (rc) { 5833 dev_err(hdev->dev, "failed resetting SM registers"); 5834 return -ENOMEM; 5835 } 5836 5837 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5838 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5839 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5840 if (rc) { 5841 dev_err(hdev->dev, "failed resetting SM registers"); 5842 return -ENOMEM; 5843 } 5844 5845 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5846 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5847 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5848 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5849 if (rc) { 5850 dev_err(hdev->dev, "failed resetting SM registers"); 5851 return -ENOMEM; 5852 } 5853 5854 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5855 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5856 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5857 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5858 if (rc) { 5859 dev_err(hdev->dev, "failed resetting SM registers"); 5860 return -ENOMEM; 5861 } 5862 5863 return 0; 5864 } 5865 5866 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5867 { 5868 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5869 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5870 int i; 5871 5872 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5873 u64 sob_addr = CFG_BASE + 5874 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5875 (i * sob_delta); 5876 u32 dma_offset = i * DMA_CORE_OFFSET; 5877 5878 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5879 lower_32_bits(sob_addr)); 5880 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5881 upper_32_bits(sob_addr)); 5882 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5883 5884 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5885 * modified by the user for SRAM reduction 5886 */ 5887 if (i > 1) 5888 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5889 0x00000001); 5890 } 5891 } 5892 5893 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5894 { 5895 u32 qman_offset; 5896 int i; 5897 5898 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5899 qman_offset = i * DMA_QMAN_OFFSET; 5900 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5901 } 5902 5903 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5904 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5905 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5906 } 5907 5908 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5909 qman_offset = i * TPC_QMAN_OFFSET; 5910 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5911 } 5912 5913 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5914 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5915 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5916 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5917 } 5918 } 5919 5920 static int gaudi_restore_user_registers(struct hl_device *hdev) 5921 { 5922 int rc; 5923 5924 rc = gaudi_restore_sm_registers(hdev); 5925 if (rc) 5926 return rc; 5927 5928 gaudi_restore_dma_registers(hdev); 5929 gaudi_restore_qm_registers(hdev); 5930 5931 return 0; 5932 } 5933 5934 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5935 { 5936 return 0; 5937 } 5938 5939 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5940 { 5941 u32 size = hdev->asic_prop.mmu_pgt_size + 5942 hdev->asic_prop.mmu_cache_mng_size; 5943 struct gaudi_device *gaudi = hdev->asic_specific; 5944 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5945 5946 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5947 return 0; 5948 5949 return gaudi_memset_device_memory(hdev, addr, size, 0); 5950 } 5951 5952 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5953 { 5954 5955 } 5956 5957 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5958 u32 size_to_dma, dma_addr_t dma_addr) 5959 { 5960 u32 err_cause, val; 5961 u64 dma_offset; 5962 int rc; 5963 5964 dma_offset = dma_id * DMA_CORE_OFFSET; 5965 5966 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5967 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5968 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5969 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5970 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5971 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5972 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5973 5974 rc = hl_poll_timeout( 5975 hdev, 5976 mmDMA0_CORE_STS0 + dma_offset, 5977 val, 5978 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5979 0, 5980 1000000); 5981 5982 if (rc) { 5983 dev_err(hdev->dev, 5984 "DMA %d timed-out during reading of 0x%llx\n", 5985 dma_id, addr); 5986 return -EIO; 5987 } 5988 5989 /* Verify DMA is OK */ 5990 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5991 if (err_cause) { 5992 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5993 dev_dbg(hdev->dev, 5994 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5995 err_cause); 5996 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5997 5998 return -EIO; 5999 } 6000 6001 return 0; 6002 } 6003 6004 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 6005 void *blob_addr) 6006 { 6007 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 6008 u32 qm_glbl_sts0, qm_cgm_sts; 6009 u64 dma_offset, qm_offset; 6010 dma_addr_t dma_addr; 6011 void *kernel_addr; 6012 bool is_eng_idle; 6013 int rc = 0, dma_id; 6014 6015 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 6016 6017 if (!kernel_addr) 6018 return -ENOMEM; 6019 6020 hdev->asic_funcs->hw_queues_lock(hdev); 6021 6022 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 6023 dma_offset = dma_id * DMA_CORE_OFFSET; 6024 qm_offset = dma_id * DMA_QMAN_OFFSET; 6025 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6026 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6027 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6028 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6029 IS_DMA_IDLE(dma_core_sts0); 6030 6031 if (!is_eng_idle) { 6032 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 6033 dma_offset = dma_id * DMA_CORE_OFFSET; 6034 qm_offset = dma_id * DMA_QMAN_OFFSET; 6035 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 6036 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 6037 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 6038 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 6039 IS_DMA_IDLE(dma_core_sts0); 6040 6041 if (!is_eng_idle) { 6042 dev_err_ratelimited(hdev->dev, 6043 "Can't read via DMA because it is BUSY\n"); 6044 rc = -EAGAIN; 6045 goto out; 6046 } 6047 } 6048 6049 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 6050 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 6051 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 6052 6053 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6054 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6055 * ASID 6056 */ 6057 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6058 6059 /* Verify DMA is OK */ 6060 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6061 if (err_cause) { 6062 dev_dbg(hdev->dev, 6063 "Clearing DMA0 engine from errors (cause 0x%x)\n", 6064 err_cause); 6065 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 6066 } 6067 6068 pos = 0; 6069 size_left = size; 6070 size_to_dma = SZ_2M; 6071 6072 while (size_left > 0) { 6073 6074 if (size_left < SZ_2M) 6075 size_to_dma = size_left; 6076 6077 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6078 dma_addr); 6079 if (rc) 6080 break; 6081 6082 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6083 6084 if (size_left <= SZ_2M) 6085 break; 6086 6087 pos += SZ_2M; 6088 addr += SZ_2M; 6089 size_left -= SZ_2M; 6090 } 6091 6092 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6093 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6094 * ASID 6095 */ 6096 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6097 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6098 6099 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6100 6101 out: 6102 hdev->asic_funcs->hw_queues_unlock(hdev); 6103 6104 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6105 6106 return rc; 6107 } 6108 6109 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6110 { 6111 struct gaudi_device *gaudi = hdev->asic_specific; 6112 6113 if (hdev->reset_info.hard_reset_pending) 6114 return U64_MAX; 6115 6116 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6117 (addr - gaudi->hbm_bar_cur_addr)); 6118 } 6119 6120 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6121 { 6122 struct gaudi_device *gaudi = hdev->asic_specific; 6123 6124 if (hdev->reset_info.hard_reset_pending) 6125 return; 6126 6127 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6128 (addr - gaudi->hbm_bar_cur_addr)); 6129 } 6130 6131 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6132 { 6133 /* mask to zero the MMBP and ASID bits */ 6134 WREG32_AND(reg, ~0x7FF); 6135 WREG32_OR(reg, asid); 6136 } 6137 6138 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6139 { 6140 struct gaudi_device *gaudi = hdev->asic_specific; 6141 6142 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6143 return; 6144 6145 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6146 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6147 return; 6148 } 6149 6150 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6152 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6155 6156 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6160 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6161 6162 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6167 6168 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6173 6174 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6176 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6179 6180 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6184 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6185 6186 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6191 6192 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6197 6198 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6206 6207 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6213 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6214 6215 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6216 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6217 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6218 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6219 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6220 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6221 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6222 6223 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6224 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6225 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6226 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6227 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6228 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6229 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6230 6231 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6232 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6233 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6234 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6235 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6236 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6237 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6238 6239 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6240 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6241 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6242 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6243 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6244 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6245 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6246 6247 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6248 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6249 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6250 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6251 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6252 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6253 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6254 6255 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6256 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6257 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6258 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6259 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6260 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6261 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6262 6263 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6264 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6265 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6266 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6267 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6268 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6269 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6270 6271 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6272 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6273 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6274 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6275 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6276 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6277 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6278 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6279 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6280 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6281 6282 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6283 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6284 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6285 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6286 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6287 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6288 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6289 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6290 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6291 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6292 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6293 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6294 6295 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6296 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6297 asid); 6298 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6299 asid); 6300 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6301 asid); 6302 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6303 asid); 6304 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6305 asid); 6306 } 6307 6308 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6309 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6310 asid); 6311 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6312 asid); 6313 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6314 asid); 6315 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6316 asid); 6317 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6318 asid); 6319 } 6320 6321 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6322 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6323 asid); 6324 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6325 asid); 6326 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6327 asid); 6328 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6329 asid); 6330 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6331 asid); 6332 } 6333 6334 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6335 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6336 asid); 6337 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6338 asid); 6339 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6340 asid); 6341 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6342 asid); 6343 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6344 asid); 6345 } 6346 6347 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6348 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6349 asid); 6350 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6351 asid); 6352 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6353 asid); 6354 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6355 asid); 6356 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6357 asid); 6358 } 6359 6360 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6361 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6362 asid); 6363 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6364 asid); 6365 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6366 asid); 6367 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6368 asid); 6369 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6370 asid); 6371 } 6372 6373 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6374 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6375 asid); 6376 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6377 asid); 6378 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6379 asid); 6380 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6381 asid); 6382 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6383 asid); 6384 } 6385 6386 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6387 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6388 asid); 6389 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6390 asid); 6391 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6392 asid); 6393 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6394 asid); 6395 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6396 asid); 6397 } 6398 6399 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6400 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6401 asid); 6402 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6403 asid); 6404 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6405 asid); 6406 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6407 asid); 6408 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6409 asid); 6410 } 6411 6412 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6413 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6414 asid); 6415 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6416 asid); 6417 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6418 asid); 6419 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6420 asid); 6421 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6422 asid); 6423 } 6424 6425 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6426 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6427 } 6428 6429 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6430 struct hl_cs_job *job) 6431 { 6432 struct packet_msg_prot *fence_pkt; 6433 u32 *fence_ptr; 6434 dma_addr_t fence_dma_addr; 6435 struct hl_cb *cb; 6436 u32 tmp, timeout, dma_offset; 6437 int rc; 6438 6439 if (hdev->pldm) 6440 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6441 else 6442 timeout = HL_DEVICE_TIMEOUT_USEC; 6443 6444 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6445 if (!fence_ptr) { 6446 dev_err(hdev->dev, 6447 "Failed to allocate fence memory for QMAN0\n"); 6448 return -ENOMEM; 6449 } 6450 6451 cb = job->patched_cb; 6452 6453 fence_pkt = cb->kernel_address + 6454 job->job_cb_size - sizeof(struct packet_msg_prot); 6455 6456 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6457 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6458 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6459 6460 fence_pkt->ctl = cpu_to_le32(tmp); 6461 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6462 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6463 6464 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6465 6466 WREG32(mmDMA0_CORE_PROT + dma_offset, 6467 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6468 6469 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6470 job->job_cb_size, cb->bus_address); 6471 if (rc) { 6472 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6473 goto free_fence_ptr; 6474 } 6475 6476 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6477 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6478 timeout, true); 6479 6480 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6481 6482 if (rc == -ETIMEDOUT) { 6483 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6484 goto free_fence_ptr; 6485 } 6486 6487 free_fence_ptr: 6488 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6489 6490 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6491 return rc; 6492 } 6493 6494 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6495 { 6496 if (event_type >= GAUDI_EVENT_SIZE) 6497 goto event_not_supported; 6498 6499 if (!gaudi_irq_map_table[event_type].valid) 6500 goto event_not_supported; 6501 6502 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6503 6504 return; 6505 6506 event_not_supported: 6507 snprintf(desc, size, "N/A"); 6508 } 6509 6510 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6511 bool is_write, u16 *engine_id_1, 6512 u16 *engine_id_2) 6513 { 6514 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6515 6516 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6517 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6518 6519 switch (x_y) { 6520 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6521 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6522 dma_id[0] = 0; 6523 dma_id[1] = 2; 6524 break; 6525 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6526 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6527 dma_id[0] = 1; 6528 dma_id[1] = 3; 6529 break; 6530 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6531 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6532 dma_id[0] = 4; 6533 dma_id[1] = 6; 6534 break; 6535 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6536 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6537 dma_id[0] = 5; 6538 dma_id[1] = 7; 6539 break; 6540 default: 6541 goto unknown_initiator; 6542 } 6543 6544 for (i = 0 ; i < 2 ; i++) { 6545 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6546 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6547 } 6548 6549 switch (x_y) { 6550 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6551 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6552 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6553 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6554 return "DMA0"; 6555 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6556 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6557 return "DMA2"; 6558 } else { 6559 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6560 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6561 return "DMA0 or DMA2"; 6562 } 6563 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6564 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6565 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6566 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6567 return "DMA1"; 6568 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6569 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6570 return "DMA3"; 6571 } else { 6572 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6573 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6574 return "DMA1 or DMA3"; 6575 } 6576 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6578 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6579 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6580 return "DMA4"; 6581 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6582 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6583 return "DMA6"; 6584 } else { 6585 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6586 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6587 return "DMA4 or DMA6"; 6588 } 6589 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6590 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6591 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6592 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6593 return "DMA5"; 6594 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6595 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6596 return "DMA7"; 6597 } else { 6598 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6599 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6600 return "DMA5 or DMA7"; 6601 } 6602 } 6603 6604 unknown_initiator: 6605 return "unknown initiator"; 6606 } 6607 6608 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6609 u16 *engine_id_1, u16 *engine_id_2) 6610 { 6611 u32 val, x_y, axi_id; 6612 6613 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6614 RREG32(mmMMU_UP_RAZWI_READ_ID); 6615 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6616 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6617 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6618 RAZWI_INITIATOR_AXI_ID_SHIFT); 6619 6620 switch (x_y) { 6621 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6622 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6623 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6624 return "TPC0"; 6625 } 6626 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6627 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6628 return "NIC0"; 6629 } 6630 break; 6631 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6632 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6633 return "TPC1"; 6634 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6635 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6636 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6637 return "MME0"; 6638 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6639 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6640 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6641 return "MME1"; 6642 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6643 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6644 return "TPC2"; 6645 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6646 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6647 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6648 return "TPC3"; 6649 } 6650 /* PCI, CPU or PSOC does not have engine id*/ 6651 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6652 return "PCI"; 6653 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6654 return "CPU"; 6655 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6656 return "PSOC"; 6657 break; 6658 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6659 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6660 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6661 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6662 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6663 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6664 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6665 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6666 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6667 engine_id_1, engine_id_2); 6668 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6669 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6670 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6671 return "TPC4"; 6672 } 6673 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6674 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6675 return "NIC1"; 6676 } 6677 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6678 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6679 return "NIC2"; 6680 } 6681 break; 6682 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6683 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6684 return "TPC5"; 6685 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6686 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6687 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6688 return "MME2"; 6689 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6690 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6691 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6692 return "MME3"; 6693 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6694 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6695 return "TPC6"; 6696 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6697 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6698 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6699 return "TPC7"; 6700 } 6701 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6702 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6703 return "NIC4"; 6704 } 6705 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6706 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6707 return "NIC5"; 6708 } 6709 break; 6710 default: 6711 break; 6712 } 6713 6714 dev_err(hdev->dev, 6715 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6716 val, 6717 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6718 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6719 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6720 RAZWI_INITIATOR_AXI_ID_MASK); 6721 6722 return "unknown initiator"; 6723 } 6724 6725 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6726 u16 *engine_id_2, bool *is_read, bool *is_write) 6727 { 6728 6729 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6730 dev_err_ratelimited(hdev->dev, 6731 "RAZWI event caused by illegal write of %s\n", 6732 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6733 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6734 *is_write = true; 6735 } 6736 6737 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6738 dev_err_ratelimited(hdev->dev, 6739 "RAZWI event caused by illegal read of %s\n", 6740 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6741 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6742 *is_read = true; 6743 } 6744 } 6745 6746 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6747 { 6748 struct gaudi_device *gaudi = hdev->asic_specific; 6749 u32 val; 6750 6751 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6752 return; 6753 6754 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6755 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6756 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6757 *addr <<= 32; 6758 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6759 6760 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6761 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6762 6763 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6764 } 6765 6766 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6767 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6768 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6769 *addr <<= 32; 6770 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6771 6772 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6773 6774 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6775 } 6776 } 6777 6778 /* 6779 * +-------------------+------------------------------------------------------+ 6780 * | Configuration Reg | Description | 6781 * | Address | | 6782 * +-------------------+------------------------------------------------------+ 6783 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6784 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6785 * | |0xF34 memory wrappers 63:32 | 6786 * | |0xF38 memory wrappers 95:64 | 6787 * | |0xF3C memory wrappers 127:96 | 6788 * +-------------------+------------------------------------------------------+ 6789 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6790 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6791 * | |0xF44 memory wrappers 63:32 | 6792 * | |0xF48 memory wrappers 95:64 | 6793 * | |0xF4C memory wrappers 127:96 | 6794 * +-------------------+------------------------------------------------------+ 6795 */ 6796 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6797 struct ecc_info_extract_params *params, u64 *ecc_address, 6798 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6799 { 6800 u32 i, num_mem_regs, reg, err_bit; 6801 u64 err_addr, err_word = 0; 6802 6803 num_mem_regs = params->num_memories / 32 + 6804 ((params->num_memories % 32) ? 1 : 0); 6805 6806 if (params->block_address >= CFG_BASE) 6807 params->block_address -= CFG_BASE; 6808 6809 if (params->derr) 6810 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6811 else 6812 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6813 6814 /* Set invalid wrapper index */ 6815 *memory_wrapper_idx = 0xFF; 6816 6817 /* Iterate through memory wrappers, a single bit must be set */ 6818 for (i = 0 ; i < num_mem_regs ; i++) { 6819 err_addr += i * 4; 6820 err_word = RREG32(err_addr); 6821 if (err_word) { 6822 err_bit = __ffs(err_word); 6823 *memory_wrapper_idx = err_bit + (32 * i); 6824 break; 6825 } 6826 } 6827 6828 if (*memory_wrapper_idx == 0xFF) { 6829 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6830 return -EINVAL; 6831 } 6832 6833 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6834 *memory_wrapper_idx); 6835 6836 *ecc_address = 6837 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6838 *ecc_syndrom = 6839 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6840 6841 /* Clear error indication */ 6842 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6843 if (params->derr) 6844 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6845 else 6846 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6847 6848 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6849 6850 return 0; 6851 } 6852 6853 /* 6854 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6855 * 6856 * @idx: the current pi/ci value 6857 * @q_len: the queue length (power of 2) 6858 * 6859 * @return the cyclically decremented index 6860 */ 6861 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6862 { 6863 u32 mask = q_len - 1; 6864 6865 /* 6866 * modular decrement is equivalent to adding (queue_size -1) 6867 * later we take LSBs to make sure the value is in the 6868 * range [0, queue_len - 1] 6869 */ 6870 return (idx + q_len - 1) & mask; 6871 } 6872 6873 /** 6874 * gaudi_handle_sw_config_stream_data - print SW config stream data 6875 * 6876 * @hdev: pointer to the habanalabs device structure 6877 * @stream: the QMAN's stream 6878 * @qman_base: base address of QMAN registers block 6879 * @event_mask: mask of the last events occurred 6880 */ 6881 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6882 u64 qman_base, u64 event_mask) 6883 { 6884 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6885 u32 cq_ptr_lo_off, size; 6886 6887 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6888 6889 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6890 stream * cq_ptr_lo_off; 6891 cq_ptr_hi = cq_ptr_lo + 6892 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6893 cq_tsize = cq_ptr_lo + 6894 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6895 6896 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6897 size = RREG32(cq_tsize); 6898 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6899 stream, cq_ptr, size); 6900 6901 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6902 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6903 hdev->captured_err_info.undef_opcode.cq_size = size; 6904 hdev->captured_err_info.undef_opcode.stream_id = stream; 6905 } 6906 } 6907 6908 /** 6909 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6910 * 6911 * @hdev: pointer to the habanalabs device structure 6912 * @qid_base: first QID of the QMAN (out of 4 streams) 6913 * @stream: the QMAN's stream 6914 * @qman_base: base address of QMAN registers block 6915 * @event_mask: mask of the last events occurred 6916 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6917 */ 6918 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6919 u32 stream, u64 qman_base, 6920 u64 event_mask, 6921 bool pr_sw_conf) 6922 { 6923 u32 ci, qm_ci_stream_off, queue_len; 6924 struct hl_hw_queue *q; 6925 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6926 int i; 6927 6928 q = &hdev->kernel_queues[qid_base + stream]; 6929 6930 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6931 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6932 stream * qm_ci_stream_off; 6933 6934 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6935 q->int_queue_len : HL_QUEUE_LENGTH; 6936 6937 hdev->asic_funcs->hw_queues_lock(hdev); 6938 6939 if (pr_sw_conf) 6940 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6941 6942 ci = RREG32(pq_ci); 6943 6944 /* we should start printing form ci -1 */ 6945 ci = gaudi_queue_idx_dec(ci, queue_len); 6946 memset(addr, 0, sizeof(addr)); 6947 6948 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6949 struct hl_bd *bd; 6950 u32 len; 6951 6952 bd = q->kernel_address; 6953 bd += ci; 6954 6955 len = le32_to_cpu(bd->len); 6956 /* len 0 means uninitialized entry- break */ 6957 if (!len) 6958 break; 6959 6960 addr[i] = le64_to_cpu(bd->ptr); 6961 6962 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6963 stream, ci, addr[i], len); 6964 6965 /* get previous ci, wrap if needed */ 6966 ci = gaudi_queue_idx_dec(ci, queue_len); 6967 } 6968 6969 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6970 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6971 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6972 6973 if (arr_idx == 0) { 6974 undef_opcode->timestamp = ktime_get(); 6975 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6976 } 6977 6978 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6979 undef_opcode->cb_addr_streams_len++; 6980 } 6981 6982 hdev->asic_funcs->hw_queues_unlock(hdev); 6983 } 6984 6985 /** 6986 * handle_qman_data_on_err - extract QMAN data on error 6987 * 6988 * @hdev: pointer to the habanalabs device structure 6989 * @qid_base: first QID of the QMAN (out of 4 streams) 6990 * @stream: the QMAN's stream 6991 * @qman_base: base address of QMAN registers block 6992 * @event_mask: mask of the last events occurred 6993 * 6994 * This function attempt to exatract as much data as possible on QMAN error. 6995 * On upper CP print the SW config stream data and last 8 PQEs. 6996 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6997 */ 6998 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6999 u32 stream, u64 qman_base, u64 event_mask) 7000 { 7001 u32 i; 7002 7003 if (stream != QMAN_STREAMS) { 7004 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 7005 qman_base, event_mask, true); 7006 return; 7007 } 7008 7009 /* handle Lower-CP */ 7010 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 7011 7012 for (i = 0; i < QMAN_STREAMS; i++) 7013 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 7014 qman_base, event_mask, false); 7015 } 7016 7017 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 7018 const char *qm_name, 7019 u64 qman_base, 7020 u32 qid_base, 7021 u64 *event_mask) 7022 { 7023 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 7024 u64 glbl_sts_addr, arb_err_addr; 7025 char reg_desc[32]; 7026 7027 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 7028 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 7029 7030 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 7031 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7032 glbl_sts_clr_val = 0; 7033 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7034 7035 if (!glbl_sts_val) 7036 continue; 7037 7038 if (i == QMAN_STREAMS) 7039 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7040 else 7041 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7042 7043 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 7044 if (glbl_sts_val & BIT(j)) { 7045 dev_err_ratelimited(hdev->dev, 7046 "%s %s. err cause: %s\n", 7047 qm_name, reg_desc, 7048 gaudi_qman_error_cause[j]); 7049 glbl_sts_clr_val |= BIT(j); 7050 } 7051 } 7052 /* check for undefined opcode */ 7053 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 7054 hdev->captured_err_info.undef_opcode.write_enable) { 7055 memset(&hdev->captured_err_info.undef_opcode, 0, 7056 sizeof(hdev->captured_err_info.undef_opcode)); 7057 7058 hdev->captured_err_info.undef_opcode.write_enable = false; 7059 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 7060 } 7061 7062 /* Write 1 clear errors */ 7063 if (!hdev->stop_on_err) 7064 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 7065 else 7066 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 7067 } 7068 7069 arb_err_val = RREG32(arb_err_addr); 7070 7071 if (!arb_err_val) 7072 return; 7073 7074 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7075 if (arb_err_val & BIT(j)) { 7076 dev_err_ratelimited(hdev->dev, 7077 "%s ARB_ERR. err cause: %s\n", 7078 qm_name, 7079 gaudi_qman_arb_error_cause[j]); 7080 } 7081 } 7082 } 7083 7084 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7085 struct hl_eq_sm_sei_data *sei_data) 7086 { 7087 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7088 7089 /* Flip the bits as the enum is ordered in the opposite way */ 7090 index = (index ^ 0x3) & 0x3; 7091 7092 switch (sei_data->sei_cause) { 7093 case SM_SEI_SO_OVERFLOW: 7094 dev_err_ratelimited(hdev->dev, 7095 "%s SEI Error: SOB Group %u overflow/underflow", 7096 gaudi_sync_manager_names[index], 7097 le32_to_cpu(sei_data->sei_log)); 7098 break; 7099 case SM_SEI_LBW_4B_UNALIGNED: 7100 dev_err_ratelimited(hdev->dev, 7101 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7102 gaudi_sync_manager_names[index], 7103 le32_to_cpu(sei_data->sei_log)); 7104 break; 7105 case SM_SEI_AXI_RESPONSE_ERR: 7106 dev_err_ratelimited(hdev->dev, 7107 "%s SEI Error: AXI ID %u response error", 7108 gaudi_sync_manager_names[index], 7109 le32_to_cpu(sei_data->sei_log)); 7110 break; 7111 default: 7112 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7113 le32_to_cpu(sei_data->sei_log)); 7114 break; 7115 } 7116 } 7117 7118 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7119 struct hl_eq_ecc_data *ecc_data) 7120 { 7121 struct ecc_info_extract_params params; 7122 u64 ecc_address = 0, ecc_syndrom = 0; 7123 u8 index, memory_wrapper_idx = 0; 7124 bool extract_info_from_fw; 7125 int rc; 7126 7127 if (hdev->asic_prop.fw_security_enabled) { 7128 extract_info_from_fw = true; 7129 goto extract_ecc_info; 7130 } 7131 7132 switch (event_type) { 7133 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7134 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7135 extract_info_from_fw = true; 7136 break; 7137 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7138 index = event_type - GAUDI_EVENT_TPC0_SERR; 7139 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7140 params.num_memories = 90; 7141 params.derr = false; 7142 extract_info_from_fw = false; 7143 break; 7144 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7145 index = event_type - GAUDI_EVENT_TPC0_DERR; 7146 params.block_address = 7147 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7148 params.num_memories = 90; 7149 params.derr = true; 7150 extract_info_from_fw = false; 7151 break; 7152 case GAUDI_EVENT_MME0_ACC_SERR: 7153 case GAUDI_EVENT_MME1_ACC_SERR: 7154 case GAUDI_EVENT_MME2_ACC_SERR: 7155 case GAUDI_EVENT_MME3_ACC_SERR: 7156 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7157 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7158 params.num_memories = 128; 7159 params.derr = false; 7160 extract_info_from_fw = false; 7161 break; 7162 case GAUDI_EVENT_MME0_ACC_DERR: 7163 case GAUDI_EVENT_MME1_ACC_DERR: 7164 case GAUDI_EVENT_MME2_ACC_DERR: 7165 case GAUDI_EVENT_MME3_ACC_DERR: 7166 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7167 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7168 params.num_memories = 128; 7169 params.derr = true; 7170 extract_info_from_fw = false; 7171 break; 7172 case GAUDI_EVENT_MME0_SBAB_SERR: 7173 case GAUDI_EVENT_MME1_SBAB_SERR: 7174 case GAUDI_EVENT_MME2_SBAB_SERR: 7175 case GAUDI_EVENT_MME3_SBAB_SERR: 7176 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7177 params.block_address = 7178 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7179 params.num_memories = 33; 7180 params.derr = false; 7181 extract_info_from_fw = false; 7182 break; 7183 case GAUDI_EVENT_MME0_SBAB_DERR: 7184 case GAUDI_EVENT_MME1_SBAB_DERR: 7185 case GAUDI_EVENT_MME2_SBAB_DERR: 7186 case GAUDI_EVENT_MME3_SBAB_DERR: 7187 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7188 params.block_address = 7189 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7190 params.num_memories = 33; 7191 params.derr = true; 7192 extract_info_from_fw = false; 7193 break; 7194 default: 7195 return; 7196 } 7197 7198 extract_ecc_info: 7199 if (extract_info_from_fw) { 7200 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7201 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7202 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7203 } else { 7204 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7205 &ecc_syndrom, &memory_wrapper_idx); 7206 if (rc) 7207 return; 7208 } 7209 7210 dev_err(hdev->dev, 7211 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7212 ecc_address, ecc_syndrom, memory_wrapper_idx); 7213 } 7214 7215 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7216 { 7217 u64 qman_base; 7218 char desc[32]; 7219 u32 qid_base; 7220 u8 index; 7221 7222 switch (event_type) { 7223 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7224 index = event_type - GAUDI_EVENT_TPC0_QM; 7225 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7226 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7227 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7228 break; 7229 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7230 if (event_type == GAUDI_EVENT_MME0_QM) { 7231 index = 0; 7232 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7233 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7234 index = 2; 7235 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7236 } 7237 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7238 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7239 break; 7240 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7241 index = event_type - GAUDI_EVENT_DMA0_QM; 7242 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7243 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7244 if (index > 1) 7245 qid_base++; 7246 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7247 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7248 break; 7249 case GAUDI_EVENT_NIC0_QM0: 7250 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7251 qman_base = mmNIC0_QM0_BASE; 7252 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7253 break; 7254 case GAUDI_EVENT_NIC0_QM1: 7255 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7256 qman_base = mmNIC0_QM1_BASE; 7257 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7258 break; 7259 case GAUDI_EVENT_NIC1_QM0: 7260 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7261 qman_base = mmNIC1_QM0_BASE; 7262 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7263 break; 7264 case GAUDI_EVENT_NIC1_QM1: 7265 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7266 qman_base = mmNIC1_QM1_BASE; 7267 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7268 break; 7269 case GAUDI_EVENT_NIC2_QM0: 7270 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7271 qman_base = mmNIC2_QM0_BASE; 7272 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7273 break; 7274 case GAUDI_EVENT_NIC2_QM1: 7275 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7276 qman_base = mmNIC2_QM1_BASE; 7277 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7278 break; 7279 case GAUDI_EVENT_NIC3_QM0: 7280 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7281 qman_base = mmNIC3_QM0_BASE; 7282 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7283 break; 7284 case GAUDI_EVENT_NIC3_QM1: 7285 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7286 qman_base = mmNIC3_QM1_BASE; 7287 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7288 break; 7289 case GAUDI_EVENT_NIC4_QM0: 7290 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7291 qman_base = mmNIC4_QM0_BASE; 7292 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7293 break; 7294 case GAUDI_EVENT_NIC4_QM1: 7295 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7296 qman_base = mmNIC4_QM1_BASE; 7297 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7298 break; 7299 default: 7300 return; 7301 } 7302 7303 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7304 } 7305 7306 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7307 bool check_razwi, u64 *event_mask) 7308 { 7309 bool is_read = false, is_write = false; 7310 u16 engine_id[2], num_of_razwi_eng = 0; 7311 char desc[64] = ""; 7312 u64 razwi_addr = 0; 7313 u8 razwi_flags = 0; 7314 7315 /* 7316 * Init engine id by default as not valid and only if razwi initiated from engine with 7317 * engine id it will get valid value. 7318 */ 7319 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7320 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7321 7322 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7323 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7324 event_type, desc); 7325 7326 if (check_razwi) { 7327 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7328 &is_write); 7329 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7330 7331 if (is_read) 7332 razwi_flags |= HL_RAZWI_READ; 7333 if (is_write) 7334 razwi_flags |= HL_RAZWI_WRITE; 7335 7336 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7337 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7338 num_of_razwi_eng = 2; 7339 else 7340 num_of_razwi_eng = 1; 7341 } 7342 7343 if (razwi_flags) 7344 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7345 razwi_flags, event_mask); 7346 } 7347 } 7348 7349 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7350 struct cpucp_pkt_sync_err *sync_err) 7351 { 7352 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7353 7354 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7355 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7356 } 7357 7358 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7359 struct hl_eq_fw_alive *fw_alive) 7360 { 7361 dev_err(hdev->dev, 7362 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7363 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7364 le32_to_cpu(fw_alive->process_id), 7365 le32_to_cpu(fw_alive->thread_id), 7366 le64_to_cpu(fw_alive->uptime_seconds)); 7367 } 7368 7369 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7370 void *data) 7371 { 7372 char desc[64] = "", *type; 7373 struct eq_nic_sei_event *eq_nic_sei = data; 7374 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7375 7376 switch (eq_nic_sei->axi_error_cause) { 7377 case RXB: 7378 type = "RXB"; 7379 break; 7380 case RXE: 7381 type = "RXE"; 7382 break; 7383 case TXS: 7384 type = "TXS"; 7385 break; 7386 case TXE: 7387 type = "TXE"; 7388 break; 7389 case QPC_RESP: 7390 type = "QPC_RESP"; 7391 break; 7392 case NON_AXI_ERR: 7393 type = "NON_AXI_ERR"; 7394 break; 7395 case TMR: 7396 type = "TMR"; 7397 break; 7398 default: 7399 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7400 eq_nic_sei->axi_error_cause); 7401 type = "N/A"; 7402 break; 7403 } 7404 7405 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7406 eq_nic_sei->id); 7407 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7408 event_type, desc); 7409 } 7410 7411 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7412 { 7413 /* GAUDI doesn't support any reset except hard-reset */ 7414 return -EPERM; 7415 } 7416 7417 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7418 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7419 { 7420 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7421 int rc = 0; 7422 7423 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7424 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7425 if (!hbm_ecc_data) { 7426 dev_err(hdev->dev, "No FW ECC data"); 7427 return 0; 7428 } 7429 7430 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7431 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7432 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7433 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7434 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7435 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7436 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7437 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7438 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7439 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7440 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7441 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7442 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7443 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7444 7445 dev_err(hdev->dev, 7446 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7447 device, ch, wr_par, rd_par, ca_par, serr, derr); 7448 dev_err(hdev->dev, 7449 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7450 device, ch, hbm_ecc_data->first_addr, type, 7451 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7452 hbm_ecc_data->dec_cnt); 7453 return 0; 7454 } 7455 7456 if (hdev->asic_prop.fw_security_enabled) { 7457 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7458 return 0; 7459 } 7460 7461 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7462 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7463 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7464 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7465 if (val) { 7466 rc = -EIO; 7467 dev_err(hdev->dev, 7468 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7469 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7470 (val >> 2) & 0x1, (val >> 3) & 0x1, 7471 (val >> 4) & 0x1); 7472 7473 val2 = RREG32(base + ch * 0x1000 + 0x060); 7474 dev_err(hdev->dev, 7475 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7476 device, ch * 2, 7477 RREG32(base + ch * 0x1000 + 0x064), 7478 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7479 (val2 & 0xFF0000) >> 16, 7480 (val2 & 0xFF000000) >> 24); 7481 } 7482 7483 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7484 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7485 if (val) { 7486 rc = -EIO; 7487 dev_err(hdev->dev, 7488 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7489 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7490 (val >> 2) & 0x1, (val >> 3) & 0x1, 7491 (val >> 4) & 0x1); 7492 7493 val2 = RREG32(base + ch * 0x1000 + 0x070); 7494 dev_err(hdev->dev, 7495 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7496 device, ch * 2 + 1, 7497 RREG32(base + ch * 0x1000 + 0x074), 7498 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7499 (val2 & 0xFF0000) >> 16, 7500 (val2 & 0xFF000000) >> 24); 7501 } 7502 7503 /* Clear interrupts */ 7504 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7505 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7506 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7507 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7508 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7509 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7510 } 7511 7512 val = RREG32(base + 0x8F30); 7513 val2 = RREG32(base + 0x8F34); 7514 if (val | val2) { 7515 rc = -EIO; 7516 dev_err(hdev->dev, 7517 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7518 device, val, val2); 7519 } 7520 val = RREG32(base + 0x8F40); 7521 val2 = RREG32(base + 0x8F44); 7522 if (val | val2) { 7523 rc = -EIO; 7524 dev_err(hdev->dev, 7525 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7526 device, val, val2); 7527 } 7528 7529 return rc; 7530 } 7531 7532 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7533 { 7534 switch (hbm_event_type) { 7535 case GAUDI_EVENT_HBM0_SPI_0: 7536 case GAUDI_EVENT_HBM0_SPI_1: 7537 return 0; 7538 case GAUDI_EVENT_HBM1_SPI_0: 7539 case GAUDI_EVENT_HBM1_SPI_1: 7540 return 1; 7541 case GAUDI_EVENT_HBM2_SPI_0: 7542 case GAUDI_EVENT_HBM2_SPI_1: 7543 return 2; 7544 case GAUDI_EVENT_HBM3_SPI_0: 7545 case GAUDI_EVENT_HBM3_SPI_1: 7546 return 3; 7547 default: 7548 break; 7549 } 7550 7551 /* Should never happen */ 7552 return 0; 7553 } 7554 7555 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7556 char *interrupt_name) 7557 { 7558 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7559 bool soft_reset_required = false; 7560 7561 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7562 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7563 7564 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7565 if (tpc_interrupts_cause & BIT(i)) { 7566 dev_err_ratelimited(hdev->dev, 7567 "TPC%d_%s interrupt cause: %s\n", 7568 tpc_id, interrupt_name, 7569 gaudi_tpc_interrupts_cause[i]); 7570 /* If this is QM error, we need to soft-reset */ 7571 if (i == 15) 7572 soft_reset_required = true; 7573 } 7574 7575 /* Clear interrupts */ 7576 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7577 7578 return soft_reset_required; 7579 } 7580 7581 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7582 { 7583 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7584 } 7585 7586 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7587 { 7588 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7589 } 7590 7591 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7592 { 7593 ktime_t zero_time = ktime_set(0, 0); 7594 7595 mutex_lock(&hdev->clk_throttling.lock); 7596 7597 switch (event_type) { 7598 case GAUDI_EVENT_FIX_POWER_ENV_S: 7599 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7600 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7601 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7602 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7603 dev_info_ratelimited(hdev->dev, 7604 "Clock throttling due to power consumption\n"); 7605 break; 7606 7607 case GAUDI_EVENT_FIX_POWER_ENV_E: 7608 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7609 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7610 dev_info_ratelimited(hdev->dev, 7611 "Power envelop is safe, back to optimal clock\n"); 7612 break; 7613 7614 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7615 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7616 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7617 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7618 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7619 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7620 dev_info_ratelimited(hdev->dev, 7621 "Clock throttling due to overheating\n"); 7622 break; 7623 7624 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7625 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7626 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7627 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7628 dev_info_ratelimited(hdev->dev, 7629 "Thermal envelop is safe, back to optimal clock\n"); 7630 break; 7631 7632 default: 7633 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7634 event_type); 7635 break; 7636 } 7637 7638 mutex_unlock(&hdev->clk_throttling.lock); 7639 } 7640 7641 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7642 { 7643 struct gaudi_device *gaudi = hdev->asic_specific; 7644 struct hl_info_fw_err_info fw_err_info; 7645 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7646 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7647 u32 fw_fatal_err_flag = 0, flags = 0; 7648 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7649 >> EQ_CTL_EVENT_TYPE_SHIFT); 7650 bool reset_required, reset_direct = false; 7651 u8 cause; 7652 int rc; 7653 7654 if (event_type >= GAUDI_EVENT_SIZE) { 7655 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7656 event_type, GAUDI_EVENT_SIZE - 1); 7657 return; 7658 } 7659 7660 gaudi->events_stat[event_type]++; 7661 gaudi->events_stat_aggregate[event_type]++; 7662 7663 switch (event_type) { 7664 case GAUDI_EVENT_PCIE_CORE_DERR: 7665 case GAUDI_EVENT_PCIE_IF_DERR: 7666 case GAUDI_EVENT_PCIE_PHY_DERR: 7667 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7668 case GAUDI_EVENT_MME0_ACC_DERR: 7669 case GAUDI_EVENT_MME0_SBAB_DERR: 7670 case GAUDI_EVENT_MME1_ACC_DERR: 7671 case GAUDI_EVENT_MME1_SBAB_DERR: 7672 case GAUDI_EVENT_MME2_ACC_DERR: 7673 case GAUDI_EVENT_MME2_SBAB_DERR: 7674 case GAUDI_EVENT_MME3_ACC_DERR: 7675 case GAUDI_EVENT_MME3_SBAB_DERR: 7676 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7677 fallthrough; 7678 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7679 case GAUDI_EVENT_PSOC_MEM_DERR: 7680 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7681 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7682 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7683 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7684 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7685 case GAUDI_EVENT_MMU_DERR: 7686 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7687 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7688 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7689 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7690 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7691 goto reset_device; 7692 7693 case GAUDI_EVENT_GIC500: 7694 case GAUDI_EVENT_AXI_ECC: 7695 case GAUDI_EVENT_L2_RAM_ECC: 7696 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7697 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7698 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7699 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7700 goto reset_device; 7701 7702 case GAUDI_EVENT_HBM0_SPI_0: 7703 case GAUDI_EVENT_HBM1_SPI_0: 7704 case GAUDI_EVENT_HBM2_SPI_0: 7705 case GAUDI_EVENT_HBM3_SPI_0: 7706 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7707 gaudi_hbm_read_interrupts(hdev, 7708 gaudi_hbm_event_to_dev(event_type), 7709 &eq_entry->hbm_ecc_data); 7710 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7711 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7712 goto reset_device; 7713 7714 case GAUDI_EVENT_HBM0_SPI_1: 7715 case GAUDI_EVENT_HBM1_SPI_1: 7716 case GAUDI_EVENT_HBM2_SPI_1: 7717 case GAUDI_EVENT_HBM3_SPI_1: 7718 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7719 gaudi_hbm_read_interrupts(hdev, 7720 gaudi_hbm_event_to_dev(event_type), 7721 &eq_entry->hbm_ecc_data); 7722 hl_fw_unmask_irq(hdev, event_type); 7723 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7724 break; 7725 7726 case GAUDI_EVENT_TPC0_DEC: 7727 case GAUDI_EVENT_TPC1_DEC: 7728 case GAUDI_EVENT_TPC2_DEC: 7729 case GAUDI_EVENT_TPC3_DEC: 7730 case GAUDI_EVENT_TPC4_DEC: 7731 case GAUDI_EVENT_TPC5_DEC: 7732 case GAUDI_EVENT_TPC6_DEC: 7733 case GAUDI_EVENT_TPC7_DEC: 7734 /* In TPC DEC event, notify on TPC assertion. While there isn't 7735 * a specific event for assertion yet, the FW generates TPC DEC event. 7736 * The SW upper layer will inspect an internal mapped area to indicate 7737 * if the event is a TPC Assertion or a "real" TPC DEC. 7738 */ 7739 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7740 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7741 reset_required = gaudi_tpc_read_interrupts(hdev, 7742 tpc_dec_event_to_tpc_id(event_type), 7743 "AXI_SLV_DEC_Error"); 7744 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7745 if (reset_required) { 7746 dev_err(hdev->dev, "reset required due to %s\n", 7747 gaudi_irq_map_table[event_type].name); 7748 7749 reset_direct = true; 7750 goto reset_device; 7751 } else { 7752 hl_fw_unmask_irq(hdev, event_type); 7753 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7754 } 7755 break; 7756 7757 case GAUDI_EVENT_TPC0_KRN_ERR: 7758 case GAUDI_EVENT_TPC1_KRN_ERR: 7759 case GAUDI_EVENT_TPC2_KRN_ERR: 7760 case GAUDI_EVENT_TPC3_KRN_ERR: 7761 case GAUDI_EVENT_TPC4_KRN_ERR: 7762 case GAUDI_EVENT_TPC5_KRN_ERR: 7763 case GAUDI_EVENT_TPC6_KRN_ERR: 7764 case GAUDI_EVENT_TPC7_KRN_ERR: 7765 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7766 reset_required = gaudi_tpc_read_interrupts(hdev, 7767 tpc_krn_event_to_tpc_id(event_type), 7768 "KRN_ERR"); 7769 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7770 if (reset_required) { 7771 dev_err(hdev->dev, "reset required due to %s\n", 7772 gaudi_irq_map_table[event_type].name); 7773 7774 reset_direct = true; 7775 goto reset_device; 7776 } else { 7777 hl_fw_unmask_irq(hdev, event_type); 7778 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7779 } 7780 break; 7781 7782 case GAUDI_EVENT_PCIE_CORE_SERR: 7783 case GAUDI_EVENT_PCIE_IF_SERR: 7784 case GAUDI_EVENT_PCIE_PHY_SERR: 7785 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7786 case GAUDI_EVENT_MME0_ACC_SERR: 7787 case GAUDI_EVENT_MME0_SBAB_SERR: 7788 case GAUDI_EVENT_MME1_ACC_SERR: 7789 case GAUDI_EVENT_MME1_SBAB_SERR: 7790 case GAUDI_EVENT_MME2_ACC_SERR: 7791 case GAUDI_EVENT_MME2_SBAB_SERR: 7792 case GAUDI_EVENT_MME3_ACC_SERR: 7793 case GAUDI_EVENT_MME3_SBAB_SERR: 7794 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7795 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7796 case GAUDI_EVENT_PSOC_MEM_SERR: 7797 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7798 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7799 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7800 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7801 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7802 fallthrough; 7803 case GAUDI_EVENT_MMU_SERR: 7804 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7805 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7806 hl_fw_unmask_irq(hdev, event_type); 7807 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7808 break; 7809 7810 case GAUDI_EVENT_PCIE_DEC: 7811 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7812 case GAUDI_EVENT_PSOC_AXI_DEC: 7813 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7814 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7815 hl_fw_unmask_irq(hdev, event_type); 7816 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7817 break; 7818 7819 case GAUDI_EVENT_MMU_PAGE_FAULT: 7820 case GAUDI_EVENT_MMU_WR_PERM: 7821 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7822 hl_fw_unmask_irq(hdev, event_type); 7823 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7824 break; 7825 7826 case GAUDI_EVENT_MME0_WBC_RSP: 7827 case GAUDI_EVENT_MME0_SBAB0_RSP: 7828 case GAUDI_EVENT_MME1_WBC_RSP: 7829 case GAUDI_EVENT_MME1_SBAB0_RSP: 7830 case GAUDI_EVENT_MME2_WBC_RSP: 7831 case GAUDI_EVENT_MME2_SBAB0_RSP: 7832 case GAUDI_EVENT_MME3_WBC_RSP: 7833 case GAUDI_EVENT_MME3_SBAB0_RSP: 7834 case GAUDI_EVENT_RAZWI_OR_ADC: 7835 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7836 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7837 fallthrough; 7838 case GAUDI_EVENT_NIC0_QM0: 7839 case GAUDI_EVENT_NIC0_QM1: 7840 case GAUDI_EVENT_NIC1_QM0: 7841 case GAUDI_EVENT_NIC1_QM1: 7842 case GAUDI_EVENT_NIC2_QM0: 7843 case GAUDI_EVENT_NIC2_QM1: 7844 case GAUDI_EVENT_NIC3_QM0: 7845 case GAUDI_EVENT_NIC3_QM1: 7846 case GAUDI_EVENT_NIC4_QM0: 7847 case GAUDI_EVENT_NIC4_QM1: 7848 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7849 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7850 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7851 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7852 hl_fw_unmask_irq(hdev, event_type); 7853 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7854 break; 7855 7856 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7857 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7858 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7859 goto reset_device; 7860 7861 case GAUDI_EVENT_TPC0_BMON_SPMU: 7862 case GAUDI_EVENT_TPC1_BMON_SPMU: 7863 case GAUDI_EVENT_TPC2_BMON_SPMU: 7864 case GAUDI_EVENT_TPC3_BMON_SPMU: 7865 case GAUDI_EVENT_TPC4_BMON_SPMU: 7866 case GAUDI_EVENT_TPC5_BMON_SPMU: 7867 case GAUDI_EVENT_TPC6_BMON_SPMU: 7868 case GAUDI_EVENT_TPC7_BMON_SPMU: 7869 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7870 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7871 hl_fw_unmask_irq(hdev, event_type); 7872 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7873 break; 7874 7875 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7876 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7877 hl_fw_unmask_irq(hdev, event_type); 7878 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7879 break; 7880 7881 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7882 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7883 gaudi_print_sm_sei_info(hdev, event_type, 7884 &eq_entry->sm_sei_data); 7885 rc = hl_state_dump(hdev); 7886 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7887 if (rc) 7888 dev_err(hdev->dev, 7889 "Error during system state dump %d\n", rc); 7890 hl_fw_unmask_irq(hdev, event_type); 7891 break; 7892 7893 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7894 break; 7895 7896 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7897 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7898 hl_fw_unmask_irq(hdev, event_type); 7899 break; 7900 7901 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7902 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7903 dev_err(hdev->dev, 7904 "Received high temp H/W interrupt %d (cause %d)\n", 7905 event_type, cause); 7906 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7907 break; 7908 7909 case GAUDI_EVENT_DEV_RESET_REQ: 7910 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7911 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7912 goto reset_device; 7913 7914 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7915 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7916 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7917 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7918 goto reset_device; 7919 7920 case GAUDI_EVENT_FW_ALIVE_S: 7921 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7922 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7923 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7924 fw_err_info.event_id = event_type; 7925 fw_err_info.event_mask = &event_mask; 7926 hl_handle_fw_err(hdev, &fw_err_info); 7927 goto reset_device; 7928 7929 default: 7930 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7931 event_type); 7932 break; 7933 } 7934 7935 if (event_mask) 7936 hl_notifier_event_send_all(hdev, event_mask); 7937 7938 return; 7939 7940 reset_device: 7941 reset_required = true; 7942 7943 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7944 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7945 7946 /* notify on device unavailable while the reset triggered by fw */ 7947 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7948 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7949 } else if (hdev->hard_reset_on_fw_events) { 7950 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7951 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7952 } else { 7953 reset_required = false; 7954 } 7955 7956 if (reset_required) { 7957 /* escalate general hw errors to critical/fatal error */ 7958 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7959 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7960 7961 hl_device_cond_reset(hdev, flags, event_mask); 7962 } else { 7963 hl_fw_unmask_irq(hdev, event_type); 7964 /* Notification on occurred event needs to be sent although reset is not executed */ 7965 if (event_mask) 7966 hl_notifier_event_send_all(hdev, event_mask); 7967 } 7968 } 7969 7970 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7971 { 7972 struct gaudi_device *gaudi = hdev->asic_specific; 7973 7974 if (aggregate) { 7975 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7976 return gaudi->events_stat_aggregate; 7977 } 7978 7979 *size = (u32) sizeof(gaudi->events_stat); 7980 return gaudi->events_stat; 7981 } 7982 7983 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7984 { 7985 struct gaudi_device *gaudi = hdev->asic_specific; 7986 u32 status, timeout_usec; 7987 int rc; 7988 7989 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7990 hdev->reset_info.hard_reset_pending) 7991 return 0; 7992 7993 if (hdev->pldm) 7994 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7995 else 7996 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7997 7998 /* L0 & L1 invalidation */ 7999 WREG32(mmSTLB_INV_PS, 3); 8000 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 8001 WREG32(mmSTLB_INV_PS, 2); 8002 8003 rc = hl_poll_timeout( 8004 hdev, 8005 mmSTLB_INV_PS, 8006 status, 8007 !status, 8008 1000, 8009 timeout_usec); 8010 8011 WREG32(mmSTLB_INV_SET, 0); 8012 8013 return rc; 8014 } 8015 8016 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 8017 bool is_hard, u32 flags, 8018 u32 asid, u64 va, u64 size) 8019 { 8020 /* Treat as invalidate all because there is no range invalidation 8021 * in Gaudi 8022 */ 8023 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 8024 } 8025 8026 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 8027 { 8028 u32 status, timeout_usec; 8029 int rc; 8030 8031 if (hdev->pldm) 8032 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 8033 else 8034 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 8035 8036 WREG32(MMU_ASID, asid); 8037 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 8038 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 8039 WREG32(MMU_BUSY, 0x80000000); 8040 8041 rc = hl_poll_timeout( 8042 hdev, 8043 MMU_BUSY, 8044 status, 8045 !(status & 0x80000000), 8046 1000, 8047 timeout_usec); 8048 8049 if (rc) { 8050 dev_err(hdev->dev, 8051 "Timeout during MMU hop0 config of asid %d\n", asid); 8052 return rc; 8053 } 8054 8055 return 0; 8056 } 8057 8058 static int gaudi_send_heartbeat(struct hl_device *hdev) 8059 { 8060 struct gaudi_device *gaudi = hdev->asic_specific; 8061 8062 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8063 return 0; 8064 8065 return hl_fw_send_heartbeat(hdev); 8066 } 8067 8068 static int gaudi_cpucp_info_get(struct hl_device *hdev) 8069 { 8070 struct gaudi_device *gaudi = hdev->asic_specific; 8071 struct asic_fixed_properties *prop = &hdev->asic_prop; 8072 int rc; 8073 8074 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8075 return 0; 8076 8077 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8078 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8079 mmCPU_BOOT_ERR1); 8080 if (rc) 8081 return rc; 8082 8083 if (!strlen(prop->cpucp_info.card_name)) 8084 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8085 CARD_NAME_MAX_LEN); 8086 8087 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8088 8089 set_default_power_values(hdev); 8090 8091 return 0; 8092 } 8093 8094 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8095 struct engines_data *e) 8096 { 8097 struct gaudi_device *gaudi = hdev->asic_specific; 8098 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8099 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8100 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8101 unsigned long *mask = (unsigned long *)mask_arr; 8102 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8103 bool is_idle = true, is_eng_idle, is_slave; 8104 u64 offset; 8105 int i, dma_id, port; 8106 8107 if (e) 8108 hl_engine_data_sprintf(e, 8109 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8110 "--- ------- ------------ ---------- -------------\n"); 8111 8112 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8113 dma_id = gaudi_dma_assignment[i]; 8114 offset = dma_id * DMA_QMAN_OFFSET; 8115 8116 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8117 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8118 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8119 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8120 IS_DMA_IDLE(dma_core_sts0); 8121 is_idle &= is_eng_idle; 8122 8123 if (mask && !is_eng_idle) 8124 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8125 if (e) 8126 hl_engine_data_sprintf(e, fmt, dma_id, 8127 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8128 qm_cgm_sts, dma_core_sts0); 8129 } 8130 8131 if (e) 8132 hl_engine_data_sprintf(e, 8133 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8134 "--- ------- ------------ ---------- ----------\n"); 8135 8136 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8137 offset = i * TPC_QMAN_OFFSET; 8138 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8139 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8140 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8141 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8142 IS_TPC_IDLE(tpc_cfg_sts); 8143 is_idle &= is_eng_idle; 8144 8145 if (mask && !is_eng_idle) 8146 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8147 if (e) 8148 hl_engine_data_sprintf(e, fmt, i, 8149 is_eng_idle ? "Y" : "N", 8150 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8151 } 8152 8153 if (e) 8154 hl_engine_data_sprintf(e, 8155 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8156 "--- ------- ------------ ---------- -----------\n"); 8157 8158 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8159 offset = i * MME_QMAN_OFFSET; 8160 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8161 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8162 8163 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8164 is_slave = i % 2; 8165 if (!is_slave) { 8166 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8167 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8168 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8169 } 8170 8171 is_idle &= is_eng_idle; 8172 8173 if (mask && !is_eng_idle) 8174 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8175 if (e) { 8176 if (!is_slave) 8177 hl_engine_data_sprintf(e, fmt, i, 8178 is_eng_idle ? "Y" : "N", 8179 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8180 else 8181 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8182 is_eng_idle ? "Y" : "N", "-", 8183 "-", mme_arch_sts); 8184 } 8185 } 8186 8187 if (e) 8188 hl_engine_data_sprintf(e, 8189 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8190 "--- ------- ------------ ----------\n"); 8191 8192 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8193 offset = i * NIC_MACRO_QMAN_OFFSET; 8194 port = 2 * i; 8195 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8196 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8197 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8198 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8199 is_idle &= is_eng_idle; 8200 8201 if (mask && !is_eng_idle) 8202 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8203 if (e) 8204 hl_engine_data_sprintf(e, nic_fmt, port, 8205 is_eng_idle ? "Y" : "N", 8206 qm_glbl_sts0, qm_cgm_sts); 8207 } 8208 8209 port = 2 * i + 1; 8210 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8211 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8212 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8213 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8214 is_idle &= is_eng_idle; 8215 8216 if (mask && !is_eng_idle) 8217 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8218 if (e) 8219 hl_engine_data_sprintf(e, nic_fmt, port, 8220 is_eng_idle ? "Y" : "N", 8221 qm_glbl_sts0, qm_cgm_sts); 8222 } 8223 } 8224 8225 if (e) 8226 hl_engine_data_sprintf(e, "\n"); 8227 8228 return is_idle; 8229 } 8230 8231 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8232 __acquires(&gaudi->hw_queues_lock) 8233 { 8234 struct gaudi_device *gaudi = hdev->asic_specific; 8235 8236 spin_lock(&gaudi->hw_queues_lock); 8237 } 8238 8239 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8240 __releases(&gaudi->hw_queues_lock) 8241 { 8242 struct gaudi_device *gaudi = hdev->asic_specific; 8243 8244 spin_unlock(&gaudi->hw_queues_lock); 8245 } 8246 8247 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8248 { 8249 return hdev->pdev->device; 8250 } 8251 8252 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8253 size_t max_size) 8254 { 8255 struct gaudi_device *gaudi = hdev->asic_specific; 8256 8257 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8258 return 0; 8259 8260 return hl_fw_get_eeprom_data(hdev, data, max_size); 8261 } 8262 8263 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8264 { 8265 struct gaudi_device *gaudi = hdev->asic_specific; 8266 8267 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8268 return 0; 8269 8270 return hl_fw_get_monitor_dump(hdev, data); 8271 } 8272 8273 /* 8274 * this function should be used only during initialization and/or after reset, 8275 * when there are no active users. 8276 */ 8277 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8278 { 8279 u64 kernel_timeout; 8280 u32 status, offset; 8281 int rc; 8282 8283 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8284 8285 if (hdev->pldm) 8286 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8287 else 8288 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8289 8290 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8291 lower_32_bits(tpc_kernel)); 8292 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8293 upper_32_bits(tpc_kernel)); 8294 8295 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8296 lower_32_bits(tpc_kernel)); 8297 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8298 upper_32_bits(tpc_kernel)); 8299 /* set a valid LUT pointer, content is of no significance */ 8300 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8301 lower_32_bits(tpc_kernel)); 8302 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8303 upper_32_bits(tpc_kernel)); 8304 8305 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8306 lower_32_bits(CFG_BASE + 8307 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8308 8309 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8310 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8311 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8312 /* wait a bit for the engine to start executing */ 8313 usleep_range(1000, 1500); 8314 8315 /* wait until engine has finished executing */ 8316 rc = hl_poll_timeout( 8317 hdev, 8318 mmTPC0_CFG_STATUS + offset, 8319 status, 8320 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8321 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8322 1000, 8323 kernel_timeout); 8324 8325 if (rc) { 8326 dev_err(hdev->dev, 8327 "Timeout while waiting for TPC%d icache prefetch\n", 8328 tpc_id); 8329 return -EIO; 8330 } 8331 8332 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8333 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8334 8335 /* wait a bit for the engine to start executing */ 8336 usleep_range(1000, 1500); 8337 8338 /* wait until engine has finished executing */ 8339 rc = hl_poll_timeout( 8340 hdev, 8341 mmTPC0_CFG_STATUS + offset, 8342 status, 8343 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8344 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8345 1000, 8346 kernel_timeout); 8347 8348 if (rc) { 8349 dev_err(hdev->dev, 8350 "Timeout while waiting for TPC%d vector pipe\n", 8351 tpc_id); 8352 return -EIO; 8353 } 8354 8355 rc = hl_poll_timeout( 8356 hdev, 8357 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8358 status, 8359 (status == 0), 8360 1000, 8361 kernel_timeout); 8362 8363 if (rc) { 8364 dev_err(hdev->dev, 8365 "Timeout while waiting for TPC%d kernel to execute\n", 8366 tpc_id); 8367 return -EIO; 8368 } 8369 8370 return 0; 8371 } 8372 8373 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8374 struct hl_ctx *ctx) 8375 { 8376 struct gaudi_device *gaudi = hdev->asic_specific; 8377 int min_alloc_order, rc, collective_cb_size; 8378 8379 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8380 return 0; 8381 8382 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8383 HOST_SPACE_INTERNAL_CB_SZ, 8384 &hdev->internal_cb_pool_dma_addr, 8385 GFP_KERNEL | __GFP_ZERO); 8386 8387 if (!hdev->internal_cb_pool_virt_addr) 8388 return -ENOMEM; 8389 8390 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8391 sizeof(struct packet_fence); 8392 min_alloc_order = ilog2(collective_cb_size); 8393 8394 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8395 if (!hdev->internal_cb_pool) { 8396 dev_err(hdev->dev, 8397 "Failed to create internal CB pool\n"); 8398 rc = -ENOMEM; 8399 goto free_internal_cb_pool; 8400 } 8401 8402 rc = gen_pool_add(hdev->internal_cb_pool, 8403 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8404 HOST_SPACE_INTERNAL_CB_SZ, -1); 8405 if (rc) { 8406 dev_err(hdev->dev, 8407 "Failed to add memory to internal CB pool\n"); 8408 rc = -EFAULT; 8409 goto destroy_internal_cb_pool; 8410 } 8411 8412 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8413 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8414 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8415 8416 if (!hdev->internal_cb_va_base) { 8417 rc = -ENOMEM; 8418 goto destroy_internal_cb_pool; 8419 } 8420 8421 mutex_lock(&hdev->mmu_lock); 8422 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8423 hdev->internal_cb_pool_dma_addr, 8424 HOST_SPACE_INTERNAL_CB_SZ); 8425 8426 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8427 mutex_unlock(&hdev->mmu_lock); 8428 8429 if (rc) 8430 goto unreserve_internal_cb_pool; 8431 8432 return 0; 8433 8434 unreserve_internal_cb_pool: 8435 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8436 HOST_SPACE_INTERNAL_CB_SZ); 8437 destroy_internal_cb_pool: 8438 gen_pool_destroy(hdev->internal_cb_pool); 8439 free_internal_cb_pool: 8440 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8441 hdev->internal_cb_pool_dma_addr); 8442 8443 return rc; 8444 } 8445 8446 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8447 struct hl_ctx *ctx) 8448 { 8449 struct gaudi_device *gaudi = hdev->asic_specific; 8450 8451 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8452 return; 8453 8454 mutex_lock(&hdev->mmu_lock); 8455 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8456 HOST_SPACE_INTERNAL_CB_SZ); 8457 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8458 HOST_SPACE_INTERNAL_CB_SZ); 8459 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8460 mutex_unlock(&hdev->mmu_lock); 8461 8462 gen_pool_destroy(hdev->internal_cb_pool); 8463 8464 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8465 hdev->internal_cb_pool_dma_addr); 8466 } 8467 8468 static int gaudi_ctx_init(struct hl_ctx *ctx) 8469 { 8470 int rc; 8471 8472 if (ctx->asid == HL_KERNEL_ASID_ID) 8473 return 0; 8474 8475 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8476 if (rc) 8477 return rc; 8478 8479 rc = gaudi_restore_user_registers(ctx->hdev); 8480 if (rc) 8481 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8482 8483 return rc; 8484 } 8485 8486 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8487 { 8488 if (ctx->asid == HL_KERNEL_ASID_ID) 8489 return; 8490 8491 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8492 } 8493 8494 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8495 { 8496 return 0; 8497 } 8498 8499 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8500 { 8501 return gaudi_cq_assignment[cq_idx]; 8502 } 8503 8504 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8505 { 8506 return sizeof(struct packet_msg_short) + 8507 sizeof(struct packet_msg_prot) * 2; 8508 } 8509 8510 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8511 { 8512 return sizeof(struct packet_msg_short) * 4 + 8513 sizeof(struct packet_fence) + 8514 sizeof(struct packet_msg_prot) * 2; 8515 } 8516 8517 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8518 { 8519 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8520 } 8521 8522 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8523 u32 size, bool eb) 8524 { 8525 struct hl_cb *cb = (struct hl_cb *) data; 8526 struct packet_msg_short *pkt; 8527 u32 value, ctl, pkt_size = sizeof(*pkt); 8528 8529 pkt = cb->kernel_address + size; 8530 memset(pkt, 0, pkt_size); 8531 8532 /* Inc by 1, Mode ADD */ 8533 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8534 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8535 8536 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8537 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8538 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8539 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8540 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8541 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8542 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8543 8544 pkt->value = cpu_to_le32(value); 8545 pkt->ctl = cpu_to_le32(ctl); 8546 8547 return size + pkt_size; 8548 } 8549 8550 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8551 u16 addr) 8552 { 8553 u32 ctl, pkt_size = sizeof(*pkt); 8554 8555 memset(pkt, 0, pkt_size); 8556 8557 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8558 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8559 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8560 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8561 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8562 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8563 8564 pkt->value = cpu_to_le32(value); 8565 pkt->ctl = cpu_to_le32(ctl); 8566 8567 return pkt_size; 8568 } 8569 8570 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8571 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8572 u16 sob_val, u16 mon_id) 8573 { 8574 u64 monitor_base; 8575 u32 ctl, value, pkt_size = sizeof(*pkt); 8576 u16 msg_addr_offset; 8577 u8 mask; 8578 8579 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8580 dev_err(hdev->dev, 8581 "sob_base %u (mask %#x) is not valid\n", 8582 sob_base, sob_mask); 8583 return 0; 8584 } 8585 8586 /* 8587 * monitor_base should be the content of the base0 address registers, 8588 * so it will be added to the msg short offsets 8589 */ 8590 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8591 8592 msg_addr_offset = 8593 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8594 monitor_base; 8595 8596 memset(pkt, 0, pkt_size); 8597 8598 /* Monitor config packet: bind the monitor to a sync object */ 8599 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8600 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8601 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8602 0); /* GREATER OR EQUAL*/ 8603 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8604 8605 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8606 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8607 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8608 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8609 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8610 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8611 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8612 8613 pkt->value = cpu_to_le32(value); 8614 pkt->ctl = cpu_to_le32(ctl); 8615 8616 return pkt_size; 8617 } 8618 8619 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8620 { 8621 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8622 8623 memset(pkt, 0, pkt_size); 8624 8625 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8626 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8627 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8628 8629 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8630 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8631 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8632 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8633 8634 pkt->cfg = cpu_to_le32(cfg); 8635 pkt->ctl = cpu_to_le32(ctl); 8636 8637 return pkt_size; 8638 } 8639 8640 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8641 { 8642 u32 offset, nic_index; 8643 8644 switch (queue_id) { 8645 case GAUDI_QUEUE_ID_DMA_0_0: 8646 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8647 break; 8648 case GAUDI_QUEUE_ID_DMA_0_1: 8649 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8650 break; 8651 case GAUDI_QUEUE_ID_DMA_0_2: 8652 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8653 break; 8654 case GAUDI_QUEUE_ID_DMA_0_3: 8655 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8656 break; 8657 case GAUDI_QUEUE_ID_DMA_1_0: 8658 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8659 break; 8660 case GAUDI_QUEUE_ID_DMA_1_1: 8661 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8662 break; 8663 case GAUDI_QUEUE_ID_DMA_1_2: 8664 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8665 break; 8666 case GAUDI_QUEUE_ID_DMA_1_3: 8667 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8668 break; 8669 case GAUDI_QUEUE_ID_DMA_5_0: 8670 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8671 break; 8672 case GAUDI_QUEUE_ID_DMA_5_1: 8673 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8674 break; 8675 case GAUDI_QUEUE_ID_DMA_5_2: 8676 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8677 break; 8678 case GAUDI_QUEUE_ID_DMA_5_3: 8679 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8680 break; 8681 case GAUDI_QUEUE_ID_TPC_7_0: 8682 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8683 break; 8684 case GAUDI_QUEUE_ID_TPC_7_1: 8685 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8686 break; 8687 case GAUDI_QUEUE_ID_TPC_7_2: 8688 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8689 break; 8690 case GAUDI_QUEUE_ID_TPC_7_3: 8691 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8692 break; 8693 case GAUDI_QUEUE_ID_NIC_0_0: 8694 case GAUDI_QUEUE_ID_NIC_1_0: 8695 case GAUDI_QUEUE_ID_NIC_2_0: 8696 case GAUDI_QUEUE_ID_NIC_3_0: 8697 case GAUDI_QUEUE_ID_NIC_4_0: 8698 case GAUDI_QUEUE_ID_NIC_5_0: 8699 case GAUDI_QUEUE_ID_NIC_6_0: 8700 case GAUDI_QUEUE_ID_NIC_7_0: 8701 case GAUDI_QUEUE_ID_NIC_8_0: 8702 case GAUDI_QUEUE_ID_NIC_9_0: 8703 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8704 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8705 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8706 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8707 break; 8708 case GAUDI_QUEUE_ID_NIC_0_1: 8709 case GAUDI_QUEUE_ID_NIC_1_1: 8710 case GAUDI_QUEUE_ID_NIC_2_1: 8711 case GAUDI_QUEUE_ID_NIC_3_1: 8712 case GAUDI_QUEUE_ID_NIC_4_1: 8713 case GAUDI_QUEUE_ID_NIC_5_1: 8714 case GAUDI_QUEUE_ID_NIC_6_1: 8715 case GAUDI_QUEUE_ID_NIC_7_1: 8716 case GAUDI_QUEUE_ID_NIC_8_1: 8717 case GAUDI_QUEUE_ID_NIC_9_1: 8718 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8719 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8720 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8721 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8722 break; 8723 case GAUDI_QUEUE_ID_NIC_0_2: 8724 case GAUDI_QUEUE_ID_NIC_1_2: 8725 case GAUDI_QUEUE_ID_NIC_2_2: 8726 case GAUDI_QUEUE_ID_NIC_3_2: 8727 case GAUDI_QUEUE_ID_NIC_4_2: 8728 case GAUDI_QUEUE_ID_NIC_5_2: 8729 case GAUDI_QUEUE_ID_NIC_6_2: 8730 case GAUDI_QUEUE_ID_NIC_7_2: 8731 case GAUDI_QUEUE_ID_NIC_8_2: 8732 case GAUDI_QUEUE_ID_NIC_9_2: 8733 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8734 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8735 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8736 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8737 break; 8738 case GAUDI_QUEUE_ID_NIC_0_3: 8739 case GAUDI_QUEUE_ID_NIC_1_3: 8740 case GAUDI_QUEUE_ID_NIC_2_3: 8741 case GAUDI_QUEUE_ID_NIC_3_3: 8742 case GAUDI_QUEUE_ID_NIC_4_3: 8743 case GAUDI_QUEUE_ID_NIC_5_3: 8744 case GAUDI_QUEUE_ID_NIC_6_3: 8745 case GAUDI_QUEUE_ID_NIC_7_3: 8746 case GAUDI_QUEUE_ID_NIC_8_3: 8747 case GAUDI_QUEUE_ID_NIC_9_3: 8748 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8749 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8750 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8751 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8752 break; 8753 default: 8754 return -EINVAL; 8755 } 8756 8757 *addr = CFG_BASE + offset; 8758 8759 return 0; 8760 } 8761 8762 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8763 { 8764 u64 monitor_base; 8765 u32 size = 0; 8766 u16 msg_addr_offset; 8767 8768 /* 8769 * monitor_base should be the content of the base0 address registers, 8770 * so it will be added to the msg short offsets 8771 */ 8772 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8773 8774 /* First monitor config packet: low address of the sync */ 8775 msg_addr_offset = 8776 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8777 monitor_base; 8778 8779 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8780 msg_addr_offset); 8781 8782 /* Second monitor config packet: high address of the sync */ 8783 msg_addr_offset = 8784 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8785 monitor_base; 8786 8787 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8788 msg_addr_offset); 8789 8790 /* 8791 * Third monitor config packet: the payload, i.e. what to write when the 8792 * sync triggers 8793 */ 8794 msg_addr_offset = 8795 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8796 monitor_base; 8797 8798 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8799 8800 return size; 8801 } 8802 8803 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8804 struct hl_gen_wait_properties *prop) 8805 { 8806 struct hl_cb *cb = (struct hl_cb *) prop->data; 8807 void *buf = cb->kernel_address; 8808 u64 fence_addr = 0; 8809 u32 size = prop->size; 8810 8811 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8812 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8813 prop->q_idx); 8814 return 0; 8815 } 8816 8817 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8818 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8819 prop->sob_mask, prop->sob_val, prop->mon_id); 8820 size += gaudi_add_fence_pkt(buf + size); 8821 8822 return size; 8823 } 8824 8825 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8826 { 8827 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8828 8829 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8830 hw_sob->sob_id); 8831 8832 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8833 hw_sob->sob_id * 4, 0); 8834 8835 kref_init(&hw_sob->kref); 8836 } 8837 8838 static u64 gaudi_get_device_time(struct hl_device *hdev) 8839 { 8840 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8841 8842 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8843 } 8844 8845 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8846 u32 *block_size, u32 *block_id) 8847 { 8848 return -EPERM; 8849 } 8850 8851 static int gaudi_block_mmap(struct hl_device *hdev, 8852 struct vm_area_struct *vma, 8853 u32 block_id, u32 block_size) 8854 { 8855 return -EPERM; 8856 } 8857 8858 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8859 { 8860 struct cpu_dyn_regs *dyn_regs = 8861 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8862 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8863 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8864 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8865 8866 WREG32(irq_handler_offset, 8867 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8868 } 8869 8870 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8871 { 8872 return -EINVAL; 8873 } 8874 8875 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8876 { 8877 switch (pll_idx) { 8878 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8879 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8880 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8881 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8882 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8883 case HL_GAUDI_MME_PLL: return MME_PLL; 8884 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8885 case HL_GAUDI_IF_PLL: return IF_PLL; 8886 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8887 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8888 default: return -EINVAL; 8889 } 8890 } 8891 8892 static int gaudi_add_sync_to_engine_map_entry( 8893 struct hl_sync_to_engine_map *map, u32 reg_value, 8894 enum hl_sync_engine_type engine_type, u32 engine_id) 8895 { 8896 struct hl_sync_to_engine_map_entry *entry; 8897 8898 /* Reg value represents a partial address of sync object, 8899 * it is used as unique identifier. For this we need to 8900 * clear the cutoff cfg base bits from the value. 8901 */ 8902 if (reg_value == 0 || reg_value == 0xffffffff) 8903 return 0; 8904 reg_value -= lower_32_bits(CFG_BASE); 8905 8906 /* create a new hash entry */ 8907 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8908 if (!entry) 8909 return -ENOMEM; 8910 entry->engine_type = engine_type; 8911 entry->engine_id = engine_id; 8912 entry->sync_id = reg_value; 8913 hash_add(map->tb, &entry->node, reg_value); 8914 8915 return 0; 8916 } 8917 8918 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8919 struct hl_sync_to_engine_map *map) 8920 { 8921 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8922 int i, j, rc; 8923 u32 reg_value; 8924 8925 /* Iterate over TPC engines */ 8926 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8927 8928 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8929 sds->props[SP_NEXT_TPC] * i); 8930 8931 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8932 ENGINE_TPC, i); 8933 if (rc) 8934 goto free_sync_to_engine_map; 8935 } 8936 8937 /* Iterate over MME engines */ 8938 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8939 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8940 8941 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8942 sds->props[SP_NEXT_MME] * i + 8943 j * sizeof(u32)); 8944 8945 rc = gaudi_add_sync_to_engine_map_entry( 8946 map, reg_value, ENGINE_MME, 8947 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8948 if (rc) 8949 goto free_sync_to_engine_map; 8950 } 8951 } 8952 8953 /* Iterate over DMA engines */ 8954 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8955 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8956 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8957 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8958 ENGINE_DMA, i); 8959 if (rc) 8960 goto free_sync_to_engine_map; 8961 } 8962 8963 return 0; 8964 8965 free_sync_to_engine_map: 8966 hl_state_dump_free_sync_to_engine_map(map); 8967 8968 return rc; 8969 } 8970 8971 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8972 { 8973 return FIELD_GET( 8974 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8975 mon->status); 8976 } 8977 8978 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8979 { 8980 const size_t max_write = 10; 8981 u32 gid, mask, sob; 8982 int i, offset; 8983 8984 /* Sync object ID is calculated as follows: 8985 * (8 * group_id + cleared bits in mask) 8986 */ 8987 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8988 mon->arm_data); 8989 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8990 mon->arm_data); 8991 8992 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8993 max_write; mask >>= 1, i++) { 8994 if (!(mask & 1)) { 8995 sob = gid * MONITOR_MAX_SOBS + i; 8996 8997 if (offset > 0) 8998 offset += snprintf(sobs + offset, max_write, 8999 ", "); 9000 9001 offset += snprintf(sobs + offset, max_write, "%u", sob); 9002 } 9003 } 9004 } 9005 9006 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 9007 struct hl_device *hdev, 9008 struct hl_mon_state_dump *mon) 9009 { 9010 const char *name; 9011 char scratch_buf1[BIN_REG_STRING_SIZE], 9012 scratch_buf2[BIN_REG_STRING_SIZE]; 9013 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 9014 9015 name = hl_state_dump_get_monitor_name(hdev, mon); 9016 if (!name) 9017 name = ""; 9018 9019 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 9020 9021 return hl_snprintf_resize( 9022 buf, size, offset, 9023 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 9024 mon->id, name, 9025 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 9026 mon->arm_data), 9027 hl_format_as_binary( 9028 scratch_buf1, sizeof(scratch_buf1), 9029 FIELD_GET( 9030 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 9031 mon->arm_data)), 9032 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 9033 mon->arm_data), 9034 mon->wr_data, 9035 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 9036 hl_format_as_binary( 9037 scratch_buf2, sizeof(scratch_buf2), 9038 FIELD_GET( 9039 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 9040 mon->status)), 9041 monitored_sobs); 9042 } 9043 9044 9045 static int gaudi_print_fences_single_engine( 9046 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 9047 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 9048 size_t *size, size_t *offset) 9049 { 9050 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9051 int rc = -ENOMEM, i; 9052 u32 *statuses, *fences; 9053 9054 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 9055 sizeof(*statuses), GFP_KERNEL); 9056 if (!statuses) 9057 goto out; 9058 9059 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 9060 sds->props[SP_ENGINE_NUM_OF_QUEUES], 9061 sizeof(*fences), GFP_KERNEL); 9062 if (!fences) 9063 goto free_status; 9064 9065 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9066 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9067 9068 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9069 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9070 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9071 9072 /* The actual print */ 9073 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9074 u32 fence_id; 9075 u64 fence_cnt, fence_rdata; 9076 const char *engine_name; 9077 9078 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9079 statuses[i])) 9080 continue; 9081 9082 fence_id = 9083 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9084 fence_cnt = base_offset + CFG_BASE + 9085 sizeof(u32) * 9086 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9087 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9088 sds->props[SP_FENCE0_RDATA_OFFSET]; 9089 engine_name = hl_sync_engine_to_string(engine_type); 9090 9091 rc = hl_snprintf_resize( 9092 buf, size, offset, 9093 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9094 engine_name, engine_id, 9095 i, fence_id, 9096 fence_cnt, engine_name, engine_id, fence_id, i, 9097 fence_rdata, engine_name, engine_id, fence_id, i, 9098 fences[fence_id], 9099 statuses[i]); 9100 if (rc) 9101 goto free_fences; 9102 } 9103 9104 rc = 0; 9105 9106 free_fences: 9107 kfree(fences); 9108 free_status: 9109 kfree(statuses); 9110 out: 9111 return rc; 9112 } 9113 9114 9115 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9116 .monitor_valid = gaudi_monitor_valid, 9117 .print_single_monitor = gaudi_print_single_monitor, 9118 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9119 .print_fences_single_engine = gaudi_print_fences_single_engine, 9120 }; 9121 9122 static void gaudi_state_dump_init(struct hl_device *hdev) 9123 { 9124 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9125 int i; 9126 9127 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9128 hash_add(sds->so_id_to_str_tb, 9129 &gaudi_so_id_to_str[i].node, 9130 gaudi_so_id_to_str[i].id); 9131 9132 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9133 hash_add(sds->monitor_id_to_str_tb, 9134 &gaudi_monitor_id_to_str[i].node, 9135 gaudi_monitor_id_to_str[i].id); 9136 9137 sds->props = gaudi_state_dump_specs_props; 9138 9139 sds->sync_namager_names = gaudi_sync_manager_names; 9140 9141 sds->funcs = gaudi_state_dump_funcs; 9142 } 9143 9144 static u32 *gaudi_get_stream_master_qid_arr(void) 9145 { 9146 return gaudi_stream_master; 9147 } 9148 9149 static int gaudi_set_dram_properties(struct hl_device *hdev) 9150 { 9151 return 0; 9152 } 9153 9154 static int gaudi_set_binning_masks(struct hl_device *hdev) 9155 { 9156 return 0; 9157 } 9158 9159 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9160 { 9161 } 9162 9163 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9164 { 9165 struct hl_device *hdev = dev_get_drvdata(dev); 9166 struct cpucp_info *cpucp_info; 9167 9168 cpucp_info = &hdev->asic_prop.cpucp_info; 9169 9170 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9171 } 9172 9173 static DEVICE_ATTR_RO(infineon_ver); 9174 9175 static struct attribute *gaudi_vrm_dev_attrs[] = { 9176 &dev_attr_infineon_ver.attr, 9177 NULL, 9178 }; 9179 9180 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9181 struct attribute_group *dev_vrm_attr_grp) 9182 { 9183 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9184 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9185 } 9186 9187 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9188 { 9189 return 0; 9190 } 9191 9192 static const struct hl_asic_funcs gaudi_funcs = { 9193 .early_init = gaudi_early_init, 9194 .early_fini = gaudi_early_fini, 9195 .late_init = gaudi_late_init, 9196 .late_fini = gaudi_late_fini, 9197 .sw_init = gaudi_sw_init, 9198 .sw_fini = gaudi_sw_fini, 9199 .hw_init = gaudi_hw_init, 9200 .hw_fini = gaudi_hw_fini, 9201 .halt_engines = gaudi_halt_engines, 9202 .suspend = gaudi_suspend, 9203 .resume = gaudi_resume, 9204 .mmap = gaudi_mmap, 9205 .ring_doorbell = gaudi_ring_doorbell, 9206 .pqe_write = gaudi_pqe_write, 9207 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9208 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9209 .scrub_device_mem = gaudi_scrub_device_mem, 9210 .scrub_device_dram = gaudi_scrub_device_dram, 9211 .get_int_queue_base = gaudi_get_int_queue_base, 9212 .test_queues = gaudi_test_queues, 9213 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9214 .asic_dma_pool_free = gaudi_dma_pool_free, 9215 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9216 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9217 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9218 .cs_parser = gaudi_cs_parser, 9219 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9220 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9221 .update_eq_ci = gaudi_update_eq_ci, 9222 .context_switch = gaudi_context_switch, 9223 .restore_phase_topology = gaudi_restore_phase_topology, 9224 .debugfs_read_dma = gaudi_debugfs_read_dma, 9225 .add_device_attr = gaudi_add_device_attr, 9226 .handle_eqe = gaudi_handle_eqe, 9227 .get_events_stat = gaudi_get_events_stat, 9228 .read_pte = gaudi_read_pte, 9229 .write_pte = gaudi_write_pte, 9230 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9231 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9232 .mmu_prefetch_cache_range = NULL, 9233 .send_heartbeat = gaudi_send_heartbeat, 9234 .debug_coresight = gaudi_debug_coresight, 9235 .is_device_idle = gaudi_is_device_idle, 9236 .compute_reset_late_init = gaudi_compute_reset_late_init, 9237 .hw_queues_lock = gaudi_hw_queues_lock, 9238 .hw_queues_unlock = gaudi_hw_queues_unlock, 9239 .get_pci_id = gaudi_get_pci_id, 9240 .get_eeprom_data = gaudi_get_eeprom_data, 9241 .get_monitor_dump = gaudi_get_monitor_dump, 9242 .send_cpu_message = gaudi_send_cpu_message, 9243 .pci_bars_map = gaudi_pci_bars_map, 9244 .init_iatu = gaudi_init_iatu, 9245 .rreg = hl_rreg, 9246 .wreg = hl_wreg, 9247 .halt_coresight = gaudi_halt_coresight, 9248 .ctx_init = gaudi_ctx_init, 9249 .ctx_fini = gaudi_ctx_fini, 9250 .pre_schedule_cs = gaudi_pre_schedule_cs, 9251 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9252 .load_firmware_to_device = gaudi_load_firmware_to_device, 9253 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9254 .get_signal_cb_size = gaudi_get_signal_cb_size, 9255 .get_wait_cb_size = gaudi_get_wait_cb_size, 9256 .gen_signal_cb = gaudi_gen_signal_cb, 9257 .gen_wait_cb = gaudi_gen_wait_cb, 9258 .reset_sob = gaudi_reset_sob, 9259 .reset_sob_group = gaudi_reset_sob_group, 9260 .get_device_time = gaudi_get_device_time, 9261 .pb_print_security_errors = NULL, 9262 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9263 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9264 .get_dec_base_addr = NULL, 9265 .scramble_addr = hl_mmu_scramble_addr, 9266 .descramble_addr = hl_mmu_descramble_addr, 9267 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9268 .get_hw_block_id = gaudi_get_hw_block_id, 9269 .hw_block_mmap = gaudi_block_mmap, 9270 .enable_events_from_fw = gaudi_enable_events_from_fw, 9271 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9272 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9273 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9274 .init_firmware_loader = gaudi_init_firmware_loader, 9275 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9276 .state_dump_init = gaudi_state_dump_init, 9277 .get_sob_addr = gaudi_get_sob_addr, 9278 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9279 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9280 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9281 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9282 .access_dev_mem = hl_access_dev_mem, 9283 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9284 .send_device_activity = gaudi_send_device_activity, 9285 .set_dram_properties = gaudi_set_dram_properties, 9286 .set_binning_masks = gaudi_set_binning_masks, 9287 }; 9288 9289 /** 9290 * gaudi_set_asic_funcs - set GAUDI function pointers 9291 * 9292 * @hdev: pointer to hl_device structure 9293 * 9294 */ 9295 void gaudi_set_asic_funcs(struct hl_device *hdev) 9296 { 9297 hdev->asic_funcs = &gaudi_funcs; 9298 } 9299