1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 67 68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 72 73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 82 83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 84 85 #define GAUDI_MAX_STRING_LEN 20 86 87 #define GAUDI_CB_POOL_CB_CNT 512 88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 89 90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 91 92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 93 94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 95 96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 97 98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 99 100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 101 102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 103 104 #define MONITOR_SOB_STRING_SIZE 256 105 106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 107 GAUDI_QUEUE_ID_DMA_0_0, 108 GAUDI_QUEUE_ID_DMA_0_1, 109 GAUDI_QUEUE_ID_DMA_0_2, 110 GAUDI_QUEUE_ID_DMA_0_3, 111 GAUDI_QUEUE_ID_DMA_1_0, 112 GAUDI_QUEUE_ID_DMA_1_1, 113 GAUDI_QUEUE_ID_DMA_1_2, 114 GAUDI_QUEUE_ID_DMA_1_3 115 }; 116 117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { 118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", 119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", 120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", 121 "gaudi cpu eq" 122 }; 123 124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 133 }; 134 135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 136 [0] = GAUDI_QUEUE_ID_DMA_0_0, 137 [1] = GAUDI_QUEUE_ID_DMA_0_1, 138 [2] = GAUDI_QUEUE_ID_DMA_0_2, 139 [3] = GAUDI_QUEUE_ID_DMA_0_3, 140 [4] = GAUDI_QUEUE_ID_DMA_1_0, 141 [5] = GAUDI_QUEUE_ID_DMA_1_1, 142 [6] = GAUDI_QUEUE_ID_DMA_1_2, 143 [7] = GAUDI_QUEUE_ID_DMA_1_3, 144 }; 145 146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 147 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 152 [PACKET_REPEAT] = sizeof(struct packet_repeat), 153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 154 [PACKET_FENCE] = sizeof(struct packet_fence), 155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 156 [PACKET_NOP] = sizeof(struct packet_nop), 157 [PACKET_STOP] = sizeof(struct packet_stop), 158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 159 [PACKET_WAIT] = sizeof(struct packet_wait), 160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 161 }; 162 163 static inline bool validate_packet_id(enum packet_id id) 164 { 165 switch (id) { 166 case PACKET_WREG_32: 167 case PACKET_WREG_BULK: 168 case PACKET_MSG_LONG: 169 case PACKET_MSG_SHORT: 170 case PACKET_CP_DMA: 171 case PACKET_REPEAT: 172 case PACKET_MSG_PROT: 173 case PACKET_FENCE: 174 case PACKET_LIN_DMA: 175 case PACKET_NOP: 176 case PACKET_STOP: 177 case PACKET_ARB_POINT: 178 case PACKET_WAIT: 179 case PACKET_LOAD_AND_EXE: 180 return true; 181 default: 182 return false; 183 } 184 } 185 186 static const char * const 187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 188 "tpc_address_exceed_slm", 189 "tpc_div_by_0", 190 "tpc_spu_mac_overflow", 191 "tpc_spu_addsub_overflow", 192 "tpc_spu_abs_overflow", 193 "tpc_spu_fp_dst_nan_inf", 194 "tpc_spu_fp_dst_denorm", 195 "tpc_vpu_mac_overflow", 196 "tpc_vpu_addsub_overflow", 197 "tpc_vpu_abs_overflow", 198 "tpc_vpu_fp_dst_nan_inf", 199 "tpc_vpu_fp_dst_denorm", 200 "tpc_assertions", 201 "tpc_illegal_instruction", 202 "tpc_pc_wrap_around", 203 "tpc_qm_sw_err", 204 "tpc_hbw_rresp_err", 205 "tpc_hbw_bresp_err", 206 "tpc_lbw_rresp_err", 207 "tpc_lbw_bresp_err" 208 }; 209 210 static const char * const 211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 212 "PQ AXI HBW error", 213 "CQ AXI HBW error", 214 "CP AXI HBW error", 215 "CP error due to undefined OPCODE", 216 "CP encountered STOP OPCODE", 217 "CP AXI LBW error", 218 "CP WRREG32 or WRBULK returned error", 219 "N/A", 220 "FENCE 0 inc over max value and clipped", 221 "FENCE 1 inc over max value and clipped", 222 "FENCE 2 inc over max value and clipped", 223 "FENCE 3 inc over max value and clipped", 224 "FENCE 0 dec under min value and clipped", 225 "FENCE 1 dec under min value and clipped", 226 "FENCE 2 dec under min value and clipped", 227 "FENCE 3 dec under min value and clipped" 228 }; 229 230 static const char * const 231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 232 "Choice push while full error", 233 "Choice Q watchdog error", 234 "MSG AXI LBW returned with error" 235 }; 236 237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 351 }; 352 353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 381 }; 382 383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 395 }; 396 397 static s64 gaudi_state_dump_specs_props[] = { 398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 401 [SP_MON_OBJ_WR_ADDR_LOW] = 402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 403 [SP_MON_OBJ_WR_ADDR_HIGH] = 404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 425 [SP_FENCE0_CNT_OFFSET] = 426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_FENCE0_RDATA_OFFSET] = 428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 430 [SP_NUM_CORES] = 1, 431 }; 432 433 static const int gaudi_queue_id_to_engine_id[] = { 434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 463 }; 464 465 /* The order here is opposite to the order of the indexing in the h/w. 466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 467 */ 468 static const char * const gaudi_sync_manager_names[] = { 469 "SYNC_MGR_E_N", 470 "SYNC_MGR_W_N", 471 "SYNC_MGR_E_S", 472 "SYNC_MGR_W_S", 473 NULL 474 }; 475 476 struct ecc_info_extract_params { 477 u64 block_address; 478 u32 num_memories; 479 bool derr; 480 }; 481 482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 483 u64 phys_addr); 484 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 485 struct hl_cs_job *job); 486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 487 u32 size, u64 val); 488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 489 u32 num_regs, u32 val); 490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 491 u32 tpc_id); 492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 493 static int gaudi_cpucp_info_get(struct hl_device *hdev); 494 static void gaudi_disable_clock_gating(struct hl_device *hdev); 495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 497 u32 size, bool eb); 498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 499 struct hl_gen_wait_properties *prop); 500 static inline enum hl_collective_mode 501 get_collective_mode(struct hl_device *hdev, u32 queue_id) 502 { 503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 504 return HL_COLLECTIVE_MASTER; 505 506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 508 return HL_COLLECTIVE_SLAVE; 509 510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 512 return HL_COLLECTIVE_SLAVE; 513 514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 516 return HL_COLLECTIVE_SLAVE; 517 518 return HL_COLLECTIVE_NOT_SUPPORTED; 519 } 520 521 static inline void set_default_power_values(struct hl_device *hdev) 522 { 523 struct asic_fixed_properties *prop = &hdev->asic_prop; 524 525 if (hdev->card_type == cpucp_card_type_pmc) { 526 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 527 528 if (prop->fw_security_enabled) 529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 530 else 531 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 532 } else { 533 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 534 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 535 } 536 } 537 538 static int gaudi_set_fixed_properties(struct hl_device *hdev) 539 { 540 struct asic_fixed_properties *prop = &hdev->asic_prop; 541 u32 num_sync_stream_queues = 0; 542 int i; 543 544 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 545 prop->hw_queues_props = kcalloc(prop->max_queues, 546 sizeof(struct hw_queue_properties), 547 GFP_KERNEL); 548 549 if (!prop->hw_queues_props) 550 return -ENOMEM; 551 552 for (i = 0 ; i < prop->max_queues ; i++) { 553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 555 prop->hw_queues_props[i].driver_only = 0; 556 prop->hw_queues_props[i].supports_sync_stream = 1; 557 prop->hw_queues_props[i].cb_alloc_flags = 558 CB_ALLOC_KERNEL; 559 num_sync_stream_queues++; 560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 562 prop->hw_queues_props[i].driver_only = 1; 563 prop->hw_queues_props[i].supports_sync_stream = 0; 564 prop->hw_queues_props[i].cb_alloc_flags = 565 CB_ALLOC_KERNEL; 566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 568 prop->hw_queues_props[i].driver_only = 0; 569 prop->hw_queues_props[i].supports_sync_stream = 0; 570 prop->hw_queues_props[i].cb_alloc_flags = 571 CB_ALLOC_USER; 572 573 } 574 prop->hw_queues_props[i].collective_mode = 575 get_collective_mode(hdev, i); 576 } 577 578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 579 prop->cfg_base_address = CFG_BASE; 580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 581 prop->host_base_address = HOST_PHYS_BASE; 582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 584 prop->completion_mode = HL_COMPLETION_MODE_JOB; 585 prop->collective_first_sob = 0; 586 prop->collective_first_mon = 0; 587 588 /* 2 SOBs per internal queue stream are reserved for collective */ 589 prop->sync_stream_first_sob = 590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 591 * QMAN_STREAMS * HL_RSVD_SOBS; 592 593 /* 1 monitor per internal queue stream are reserved for collective 594 * 2 monitors per external queue stream are reserved for collective 595 */ 596 prop->sync_stream_first_mon = 597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 598 (NUMBER_OF_EXT_HW_QUEUES * 2); 599 600 prop->dram_base_address = DRAM_PHYS_BASE; 601 prop->dram_size = GAUDI_HBM_SIZE_32GB; 602 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 604 605 prop->sram_base_address = SRAM_BASE_ADDR; 606 prop->sram_size = SRAM_SIZE; 607 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 608 prop->sram_user_base_address = 609 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 610 611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 613 614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 615 if (hdev->pldm) 616 prop->mmu_pgt_size = 0x800000; /* 8MB */ 617 else 618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 619 prop->mmu_pte_size = HL_PTE_SIZE; 620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 622 prop->dram_page_size = PAGE_SIZE_2MB; 623 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 624 prop->dram_supports_virtual_memory = false; 625 626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 636 prop->pmmu.start_addr = VA_HOST_SPACE_START; 637 prop->pmmu.end_addr = 638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 639 prop->pmmu.page_size = PAGE_SIZE_4KB; 640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 641 prop->pmmu.last_mask = LAST_MASK; 642 /* TODO: will be duplicated until implementing per-MMU props */ 643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 645 646 /* PMMU and HPMMU are the same except of page size */ 647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 649 650 /* shifts and masks are the same in PMMU and DMMU */ 651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 653 prop->dmmu.end_addr = VA_HOST_SPACE_END; 654 prop->dmmu.page_size = PAGE_SIZE_2MB; 655 656 prop->cfg_size = CFG_SIZE; 657 prop->max_asid = MAX_ASID; 658 prop->num_of_events = GAUDI_EVENT_SIZE; 659 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 660 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 661 662 set_default_power_values(hdev); 663 664 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 665 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 666 667 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 668 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 669 670 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 671 CARD_NAME_MAX_LEN); 672 673 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 674 675 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 676 prop->sync_stream_first_sob + 677 (num_sync_stream_queues * HL_RSVD_SOBS); 678 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 679 prop->sync_stream_first_mon + 680 (num_sync_stream_queues * HL_RSVD_MONS); 681 682 prop->first_available_user_interrupt = USHRT_MAX; 683 prop->tpc_interrupt_id = USHRT_MAX; 684 685 /* single msi */ 686 prop->eq_interrupt_id = 0; 687 688 for (i = 0 ; i < HL_MAX_DCORES ; i++) 689 prop->first_available_cq[i] = USHRT_MAX; 690 691 prop->fw_cpu_boot_dev_sts0_valid = false; 692 prop->fw_cpu_boot_dev_sts1_valid = false; 693 prop->hard_reset_done_by_fw = false; 694 prop->gic_interrupts_enable = true; 695 696 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 697 698 prop->clk_pll_index = HL_GAUDI_MME_PLL; 699 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 700 701 prop->use_get_power_for_reset_history = true; 702 703 prop->configurable_stop_on_err = true; 704 705 prop->set_max_power_on_device_init = true; 706 707 prop->dma_mask = 48; 708 709 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 710 711 return 0; 712 } 713 714 static int gaudi_pci_bars_map(struct hl_device *hdev) 715 { 716 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 717 bool is_wc[3] = {false, false, true}; 718 int rc; 719 720 rc = hl_pci_bars_map(hdev, name, is_wc); 721 if (rc) 722 return rc; 723 724 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 725 (CFG_BASE - SPI_FLASH_BASE_ADDR); 726 727 return 0; 728 } 729 730 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 731 { 732 struct gaudi_device *gaudi = hdev->asic_specific; 733 struct hl_inbound_pci_region pci_region; 734 u64 old_addr = addr; 735 int rc; 736 737 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 738 return old_addr; 739 740 if (hdev->asic_prop.iatu_done_by_fw) 741 return U64_MAX; 742 743 /* Inbound Region 2 - Bar 4 - Point to HBM */ 744 pci_region.mode = PCI_BAR_MATCH_MODE; 745 pci_region.bar = HBM_BAR_ID; 746 pci_region.addr = addr; 747 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 748 if (rc) 749 return U64_MAX; 750 751 if (gaudi) { 752 old_addr = gaudi->hbm_bar_cur_addr; 753 gaudi->hbm_bar_cur_addr = addr; 754 } 755 756 return old_addr; 757 } 758 759 static int gaudi_init_iatu(struct hl_device *hdev) 760 { 761 struct hl_inbound_pci_region inbound_region; 762 struct hl_outbound_pci_region outbound_region; 763 int rc; 764 765 if (hdev->asic_prop.iatu_done_by_fw) 766 return 0; 767 768 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 769 inbound_region.mode = PCI_BAR_MATCH_MODE; 770 inbound_region.bar = SRAM_BAR_ID; 771 inbound_region.addr = SRAM_BASE_ADDR; 772 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 773 if (rc) 774 goto done; 775 776 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 777 inbound_region.mode = PCI_BAR_MATCH_MODE; 778 inbound_region.bar = CFG_BAR_ID; 779 inbound_region.addr = SPI_FLASH_BASE_ADDR; 780 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 781 if (rc) 782 goto done; 783 784 /* Inbound Region 2 - Bar 4 - Point to HBM */ 785 inbound_region.mode = PCI_BAR_MATCH_MODE; 786 inbound_region.bar = HBM_BAR_ID; 787 inbound_region.addr = DRAM_PHYS_BASE; 788 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 789 if (rc) 790 goto done; 791 792 /* Outbound Region 0 - Point to Host */ 793 outbound_region.addr = HOST_PHYS_BASE; 794 outbound_region.size = HOST_PHYS_SIZE; 795 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 796 797 done: 798 return rc; 799 } 800 801 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 802 { 803 return RREG32(mmHW_STATE); 804 } 805 806 static int gaudi_early_init(struct hl_device *hdev) 807 { 808 struct asic_fixed_properties *prop = &hdev->asic_prop; 809 struct pci_dev *pdev = hdev->pdev; 810 resource_size_t pci_bar_size; 811 u32 fw_boot_status; 812 int rc; 813 814 rc = gaudi_set_fixed_properties(hdev); 815 if (rc) { 816 dev_err(hdev->dev, "Failed setting fixed properties\n"); 817 return rc; 818 } 819 820 /* Check BAR sizes */ 821 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 822 823 if (pci_bar_size != SRAM_BAR_SIZE) { 824 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 825 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 826 rc = -ENODEV; 827 goto free_queue_props; 828 } 829 830 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 831 832 if (pci_bar_size != CFG_BAR_SIZE) { 833 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 834 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 835 rc = -ENODEV; 836 goto free_queue_props; 837 } 838 839 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 840 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 841 842 /* If FW security is enabled at this point it means no access to ELBI */ 843 if (hdev->asic_prop.fw_security_enabled) { 844 hdev->asic_prop.iatu_done_by_fw = true; 845 846 /* 847 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 848 * decision can only be taken based on PCI ID security. 849 */ 850 hdev->asic_prop.gic_interrupts_enable = false; 851 goto pci_init; 852 } 853 854 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 855 &fw_boot_status); 856 if (rc) 857 goto free_queue_props; 858 859 /* Check whether FW is configuring iATU */ 860 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 861 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 862 hdev->asic_prop.iatu_done_by_fw = true; 863 864 pci_init: 865 rc = hl_pci_init(hdev); 866 if (rc) 867 goto free_queue_props; 868 869 /* Before continuing in the initialization, we need to read the preboot 870 * version to determine whether we run with a security-enabled firmware 871 */ 872 rc = hl_fw_read_preboot_status(hdev); 873 if (rc) { 874 if (hdev->reset_on_preboot_fail) 875 /* we are already on failure flow, so don't check if hw_fini fails. */ 876 hdev->asic_funcs->hw_fini(hdev, true, false); 877 goto pci_fini; 878 } 879 880 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 881 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 882 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 883 if (rc) { 884 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 885 goto pci_fini; 886 } 887 } 888 889 return 0; 890 891 pci_fini: 892 hl_pci_fini(hdev); 893 free_queue_props: 894 kfree(hdev->asic_prop.hw_queues_props); 895 return rc; 896 } 897 898 static int gaudi_early_fini(struct hl_device *hdev) 899 { 900 kfree(hdev->asic_prop.hw_queues_props); 901 hl_pci_fini(hdev); 902 903 return 0; 904 } 905 906 /** 907 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 908 * 909 * @hdev: pointer to hl_device structure 910 * 911 */ 912 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 913 { 914 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 915 struct asic_fixed_properties *prop = &hdev->asic_prop; 916 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 917 int rc; 918 919 if ((hdev->fw_components & FW_TYPE_LINUX) && 920 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 921 struct gaudi_device *gaudi = hdev->asic_specific; 922 923 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 924 return 0; 925 926 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 927 928 if (rc) 929 return rc; 930 931 freq = pll_freq_arr[2]; 932 } else { 933 /* Backward compatibility */ 934 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 935 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 936 nr = RREG32(mmPSOC_CPU_PLL_NR); 937 nf = RREG32(mmPSOC_CPU_PLL_NF); 938 od = RREG32(mmPSOC_CPU_PLL_OD); 939 940 if (div_sel == DIV_SEL_REF_CLK || 941 div_sel == DIV_SEL_DIVIDED_REF) { 942 if (div_sel == DIV_SEL_REF_CLK) 943 freq = PLL_REF_CLK; 944 else 945 freq = PLL_REF_CLK / (div_fctr + 1); 946 } else if (div_sel == DIV_SEL_PLL_CLK || 947 div_sel == DIV_SEL_DIVIDED_PLL) { 948 pll_clk = PLL_REF_CLK * (nf + 1) / 949 ((nr + 1) * (od + 1)); 950 if (div_sel == DIV_SEL_PLL_CLK) 951 freq = pll_clk; 952 else 953 freq = pll_clk / (div_fctr + 1); 954 } else { 955 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 956 freq = 0; 957 } 958 } 959 960 prop->psoc_timestamp_frequency = freq; 961 prop->psoc_pci_pll_nr = nr; 962 prop->psoc_pci_pll_nf = nf; 963 prop->psoc_pci_pll_od = od; 964 prop->psoc_pci_pll_div_factor = div_fctr; 965 966 return 0; 967 } 968 969 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 970 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 971 { 972 struct asic_fixed_properties *prop = &hdev->asic_prop; 973 struct packet_lin_dma *init_tpc_mem_pkt; 974 struct hl_cs_job *job; 975 struct hl_cb *cb; 976 u64 dst_addr; 977 u32 cb_size, ctl; 978 u8 tpc_id; 979 int rc; 980 981 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 982 if (!cb) 983 return -EFAULT; 984 985 init_tpc_mem_pkt = cb->kernel_address; 986 cb_size = sizeof(*init_tpc_mem_pkt); 987 memset(init_tpc_mem_pkt, 0, cb_size); 988 989 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 990 991 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 992 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 993 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 994 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 995 996 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 997 998 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 999 1000 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 1001 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 1002 round_up(prop->sram_user_base_address, SZ_8K)); 1003 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1004 1005 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1006 if (!job) { 1007 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1008 rc = -ENOMEM; 1009 goto release_cb; 1010 } 1011 1012 job->id = 0; 1013 job->user_cb = cb; 1014 atomic_inc(&job->user_cb->cs_cnt); 1015 job->user_cb_size = cb_size; 1016 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1017 job->patched_cb = job->user_cb; 1018 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1019 1020 hl_debugfs_add_job(hdev, job); 1021 1022 rc = gaudi_send_job_on_qman0(hdev, job); 1023 1024 if (rc) 1025 goto free_job; 1026 1027 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1028 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1029 if (rc) 1030 break; 1031 } 1032 1033 free_job: 1034 hl_userptr_delete_list(hdev, &job->userptr_list); 1035 hl_debugfs_remove_job(hdev, job); 1036 kfree(job); 1037 atomic_dec(&cb->cs_cnt); 1038 1039 release_cb: 1040 hl_cb_put(cb); 1041 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1042 1043 return rc; 1044 } 1045 1046 /* 1047 * gaudi_init_tpc_mem() - Initialize TPC memories. 1048 * @hdev: Pointer to hl_device structure. 1049 * 1050 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1051 * 1052 * Return: 0 for success, negative value for error. 1053 */ 1054 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1055 { 1056 const struct firmware *fw; 1057 size_t fw_size; 1058 void *cpu_addr; 1059 dma_addr_t dma_handle; 1060 int rc, count = 5; 1061 1062 again: 1063 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1064 if (rc == -EINTR && count-- > 0) { 1065 msleep(50); 1066 goto again; 1067 } 1068 1069 if (rc) { 1070 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1071 GAUDI_TPC_FW_FILE); 1072 goto out; 1073 } 1074 1075 fw_size = fw->size; 1076 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1077 if (!cpu_addr) { 1078 dev_err(hdev->dev, 1079 "Failed to allocate %zu of dma memory for TPC kernel\n", 1080 fw_size); 1081 rc = -ENOMEM; 1082 goto out; 1083 } 1084 1085 memcpy(cpu_addr, fw->data, fw_size); 1086 1087 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1088 1089 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1090 1091 out: 1092 release_firmware(fw); 1093 return rc; 1094 } 1095 1096 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1097 { 1098 struct gaudi_device *gaudi = hdev->asic_specific; 1099 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1100 struct hl_hw_queue *q; 1101 u32 i, sob_id, sob_group_id, queue_id; 1102 1103 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1104 sob_group_id = 1105 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1106 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1107 1108 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1109 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1110 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1111 q->sync_stream_prop.collective_sob_id = sob_id + i; 1112 } 1113 1114 /* Both DMA5 and TPC7 use the same resources since only a single 1115 * engine need to participate in the reduction process 1116 */ 1117 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1118 q = &hdev->kernel_queues[queue_id]; 1119 q->sync_stream_prop.collective_sob_id = 1120 sob_id + NIC_NUMBER_OF_ENGINES; 1121 1122 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1123 q = &hdev->kernel_queues[queue_id]; 1124 q->sync_stream_prop.collective_sob_id = 1125 sob_id + NIC_NUMBER_OF_ENGINES; 1126 } 1127 1128 static void gaudi_sob_group_hw_reset(struct kref *ref) 1129 { 1130 struct gaudi_hw_sob_group *hw_sob_group = 1131 container_of(ref, struct gaudi_hw_sob_group, kref); 1132 struct hl_device *hdev = hw_sob_group->hdev; 1133 int i; 1134 1135 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1136 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1137 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1138 1139 kref_init(&hw_sob_group->kref); 1140 } 1141 1142 static void gaudi_sob_group_reset_error(struct kref *ref) 1143 { 1144 struct gaudi_hw_sob_group *hw_sob_group = 1145 container_of(ref, struct gaudi_hw_sob_group, kref); 1146 struct hl_device *hdev = hw_sob_group->hdev; 1147 1148 dev_crit(hdev->dev, 1149 "SOB release shouldn't be called here, base_sob_id: %d\n", 1150 hw_sob_group->base_sob_id); 1151 } 1152 1153 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1154 { 1155 struct gaudi_collective_properties *prop; 1156 int i; 1157 1158 prop = &gaudi->collective_props; 1159 1160 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1161 1162 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1163 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1164 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1165 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1166 /* Set collective engine bit */ 1167 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1168 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1169 } 1170 1171 static int gaudi_collective_init(struct hl_device *hdev) 1172 { 1173 u32 i, sob_id, reserved_sobs_per_group; 1174 struct gaudi_collective_properties *prop; 1175 struct gaudi_device *gaudi; 1176 1177 gaudi = hdev->asic_specific; 1178 prop = &gaudi->collective_props; 1179 sob_id = hdev->asic_prop.collective_first_sob; 1180 1181 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1182 reserved_sobs_per_group = 1183 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1184 1185 /* Init SOB groups */ 1186 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1187 prop->hw_sob_group[i].hdev = hdev; 1188 prop->hw_sob_group[i].base_sob_id = sob_id; 1189 sob_id += reserved_sobs_per_group; 1190 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1191 } 1192 1193 for (i = 0 ; i < QMAN_STREAMS; i++) { 1194 prop->next_sob_group_val[i] = 1; 1195 prop->curr_sob_group_idx[i] = 0; 1196 gaudi_collective_map_sobs(hdev, i); 1197 } 1198 1199 gaudi_collective_mstr_sob_mask_set(gaudi); 1200 1201 return 0; 1202 } 1203 1204 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1205 { 1206 struct gaudi_device *gaudi = hdev->asic_specific; 1207 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1208 1209 kref_put(&cprop->hw_sob_group[sob_group].kref, 1210 gaudi_sob_group_hw_reset); 1211 } 1212 1213 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1214 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1215 { 1216 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1217 struct gaudi_collective_properties *cprop; 1218 struct hl_gen_wait_properties wait_prop; 1219 struct hl_sync_stream_properties *prop; 1220 struct gaudi_device *gaudi; 1221 1222 gaudi = hdev->asic_specific; 1223 cprop = &gaudi->collective_props; 1224 queue_id = job->hw_queue_id; 1225 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1226 1227 master_sob_base = 1228 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1229 master_monitor = prop->collective_mstr_mon_id[0]; 1230 1231 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1232 1233 dev_dbg(hdev->dev, 1234 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1235 master_sob_base, cprop->mstr_sob_mask[0], 1236 cprop->next_sob_group_val[stream], 1237 master_monitor, queue_id); 1238 1239 wait_prop.data = (void *) job->patched_cb; 1240 wait_prop.sob_base = master_sob_base; 1241 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1242 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1243 wait_prop.mon_id = master_monitor; 1244 wait_prop.q_idx = queue_id; 1245 wait_prop.size = cb_size; 1246 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1247 1248 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1249 master_monitor = prop->collective_mstr_mon_id[1]; 1250 1251 dev_dbg(hdev->dev, 1252 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1253 master_sob_base, cprop->mstr_sob_mask[1], 1254 cprop->next_sob_group_val[stream], 1255 master_monitor, queue_id); 1256 1257 wait_prop.sob_base = master_sob_base; 1258 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1259 wait_prop.mon_id = master_monitor; 1260 wait_prop.size = cb_size; 1261 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1262 } 1263 1264 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1265 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1266 { 1267 struct hl_gen_wait_properties wait_prop; 1268 struct hl_sync_stream_properties *prop; 1269 u32 queue_id, cb_size = 0; 1270 1271 queue_id = job->hw_queue_id; 1272 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1273 1274 if (job->cs->encaps_signals) { 1275 /* use the encaps signal handle store earlier in the flow 1276 * and set the SOB information from the encaps 1277 * signals handle 1278 */ 1279 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1280 cs_cmpl); 1281 1282 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1283 job->cs->sequence, 1284 cs_cmpl->hw_sob->sob_id, 1285 cs_cmpl->sob_val); 1286 } 1287 1288 /* Add to wait CBs using slave monitor */ 1289 wait_prop.data = (void *) job->user_cb; 1290 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1291 wait_prop.sob_mask = 0x1; 1292 wait_prop.sob_val = cs_cmpl->sob_val; 1293 wait_prop.mon_id = prop->collective_slave_mon_id; 1294 wait_prop.q_idx = queue_id; 1295 wait_prop.size = cb_size; 1296 1297 dev_dbg(hdev->dev, 1298 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1299 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1300 prop->collective_slave_mon_id, queue_id); 1301 1302 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1303 1304 dev_dbg(hdev->dev, 1305 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1306 prop->collective_sob_id, queue_id); 1307 1308 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1309 prop->collective_sob_id, cb_size, false); 1310 } 1311 1312 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1313 { 1314 struct hl_cs_compl *signal_cs_cmpl = 1315 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1316 struct hl_cs_compl *cs_cmpl = 1317 container_of(cs->fence, struct hl_cs_compl, base_fence); 1318 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1319 struct gaudi_collective_properties *cprop; 1320 u32 stream, queue_id, sob_group_offset; 1321 struct gaudi_device *gaudi; 1322 struct hl_device *hdev; 1323 struct hl_cs_job *job; 1324 struct hl_ctx *ctx; 1325 1326 ctx = cs->ctx; 1327 hdev = ctx->hdev; 1328 gaudi = hdev->asic_specific; 1329 cprop = &gaudi->collective_props; 1330 1331 if (cs->encaps_signals) { 1332 cs_cmpl->hw_sob = handle->hw_sob; 1333 /* at this checkpoint we only need the hw_sob pointer 1334 * for the completion check before start going over the jobs 1335 * of the master/slaves, the sob_value will be taken later on 1336 * in gaudi_collective_slave_init_job depends on each 1337 * job wait offset value. 1338 */ 1339 cs_cmpl->sob_val = 0; 1340 } else { 1341 /* copy the SOB id and value of the signal CS */ 1342 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1343 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1344 } 1345 1346 /* check again if the signal cs already completed. 1347 * if yes then don't send any wait cs since the hw_sob 1348 * could be in reset already. if signal is not completed 1349 * then get refcount to hw_sob to prevent resetting the sob 1350 * while wait cs is not submitted. 1351 * note that this check is protected by two locks, 1352 * hw queue lock and completion object lock, 1353 * and the same completion object lock also protects 1354 * the hw_sob reset handler function. 1355 * The hw_queue lock prevent out of sync of hw_sob 1356 * refcount value, changed by signal/wait flows. 1357 */ 1358 spin_lock(&signal_cs_cmpl->lock); 1359 1360 if (completion_done(&cs->signal_fence->completion)) { 1361 spin_unlock(&signal_cs_cmpl->lock); 1362 return -EINVAL; 1363 } 1364 /* Increment kref since all slave queues are now waiting on it */ 1365 kref_get(&cs_cmpl->hw_sob->kref); 1366 1367 spin_unlock(&signal_cs_cmpl->lock); 1368 1369 /* Calculate the stream from collective master queue (1st job) */ 1370 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1371 stream = job->hw_queue_id % 4; 1372 sob_group_offset = 1373 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1374 1375 list_for_each_entry(job, &cs->job_list, cs_node) { 1376 queue_id = job->hw_queue_id; 1377 1378 if (hdev->kernel_queues[queue_id].collective_mode == 1379 HL_COLLECTIVE_MASTER) 1380 gaudi_collective_master_init_job(hdev, job, stream, 1381 sob_group_offset); 1382 else 1383 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1384 } 1385 1386 cs_cmpl->sob_group = sob_group_offset; 1387 1388 /* Handle sob group kref and wraparound */ 1389 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1390 cprop->next_sob_group_val[stream]++; 1391 1392 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1393 /* 1394 * Decrement as we reached the max value. 1395 * The release function won't be called here as we've 1396 * just incremented the refcount. 1397 */ 1398 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1399 gaudi_sob_group_reset_error); 1400 cprop->next_sob_group_val[stream] = 1; 1401 /* only two SOBs are currently in use */ 1402 cprop->curr_sob_group_idx[stream] = 1403 (cprop->curr_sob_group_idx[stream] + 1) & 1404 (HL_RSVD_SOBS - 1); 1405 1406 gaudi_collective_map_sobs(hdev, stream); 1407 1408 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1409 cprop->curr_sob_group_idx[stream], stream); 1410 } 1411 1412 mb(); 1413 hl_fence_put(cs->signal_fence); 1414 cs->signal_fence = NULL; 1415 1416 return 0; 1417 } 1418 1419 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1420 { 1421 u32 cacheline_end, additional_commands; 1422 1423 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1424 additional_commands = sizeof(struct packet_msg_prot) * 2; 1425 1426 if (user_cb_size + additional_commands > cacheline_end) 1427 return cacheline_end - user_cb_size + additional_commands; 1428 else 1429 return additional_commands; 1430 } 1431 1432 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1433 struct hl_ctx *ctx, struct hl_cs *cs, 1434 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1435 u32 encaps_signal_offset) 1436 { 1437 struct hw_queue_properties *hw_queue_prop; 1438 struct hl_cs_counters_atomic *cntr; 1439 struct hl_cs_job *job; 1440 struct hl_cb *cb; 1441 u32 cb_size; 1442 bool patched_cb; 1443 1444 cntr = &hdev->aggregated_cs_counters; 1445 1446 if (mode == HL_COLLECTIVE_MASTER) { 1447 /* CB size of collective master queue contains 1448 * 4 msg short packets for monitor 1 configuration 1449 * 1 fence packet 1450 * 4 msg short packets for monitor 2 configuration 1451 * 1 fence packet 1452 * 2 msg prot packets for completion and MSI 1453 */ 1454 cb_size = sizeof(struct packet_msg_short) * 8 + 1455 sizeof(struct packet_fence) * 2 + 1456 sizeof(struct packet_msg_prot) * 2; 1457 patched_cb = true; 1458 } else { 1459 /* CB size of collective slave queues contains 1460 * 4 msg short packets for monitor configuration 1461 * 1 fence packet 1462 * 1 additional msg short packet for sob signal 1463 */ 1464 cb_size = sizeof(struct packet_msg_short) * 5 + 1465 sizeof(struct packet_fence); 1466 patched_cb = false; 1467 } 1468 1469 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1470 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1471 if (!job) { 1472 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1473 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1474 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1475 return -ENOMEM; 1476 } 1477 1478 /* Allocate internal mapped CB for non patched CBs */ 1479 cb = hl_cb_kernel_create(hdev, cb_size, 1480 hdev->mmu_enable && !patched_cb); 1481 if (!cb) { 1482 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1483 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1484 kfree(job); 1485 return -EFAULT; 1486 } 1487 1488 job->id = 0; 1489 job->cs = cs; 1490 job->user_cb = cb; 1491 atomic_inc(&job->user_cb->cs_cnt); 1492 job->user_cb_size = cb_size; 1493 job->hw_queue_id = queue_id; 1494 1495 /* since its guaranteed to have only one chunk in the collective wait 1496 * cs, we can use this chunk to set the encapsulated signal offset 1497 * in the jobs. 1498 */ 1499 if (cs->encaps_signals) 1500 job->encaps_sig_wait_offset = encaps_signal_offset; 1501 1502 /* 1503 * No need in parsing, user CB is the patched CB. 1504 * We call hl_cb_destroy() out of two reasons - we don't need 1505 * the CB in the CB idr anymore and to decrement its refcount as 1506 * it was incremented inside hl_cb_kernel_create(). 1507 */ 1508 if (patched_cb) 1509 job->patched_cb = job->user_cb; 1510 else 1511 job->patched_cb = NULL; 1512 1513 job->job_cb_size = job->user_cb_size; 1514 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1515 1516 /* increment refcount as for external queues we get completion */ 1517 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1518 cs_get(cs); 1519 1520 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1521 1522 list_add_tail(&job->cs_node, &cs->job_list); 1523 1524 hl_debugfs_add_job(hdev, job); 1525 1526 return 0; 1527 } 1528 1529 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1530 struct hl_ctx *ctx, struct hl_cs *cs, 1531 u32 wait_queue_id, u32 collective_engine_id, 1532 u32 encaps_signal_offset) 1533 { 1534 struct gaudi_device *gaudi = hdev->asic_specific; 1535 struct hw_queue_properties *hw_queue_prop; 1536 u32 queue_id, collective_queue, num_jobs; 1537 u32 stream, nic_queue, nic_idx = 0; 1538 bool skip; 1539 int i, rc = 0; 1540 1541 /* Verify wait queue id is configured as master */ 1542 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1543 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1544 dev_err(hdev->dev, 1545 "Queue %d is not configured as collective master\n", 1546 wait_queue_id); 1547 return -EINVAL; 1548 } 1549 1550 /* Verify engine id is supported */ 1551 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1552 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1553 dev_err(hdev->dev, 1554 "Collective wait does not support engine %u\n", 1555 collective_engine_id); 1556 return -EINVAL; 1557 } 1558 1559 stream = wait_queue_id % 4; 1560 1561 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1562 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1563 else 1564 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1565 1566 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1567 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1568 1569 /* First job goes to the collective master queue, it will wait for 1570 * the collective slave queues to finish execution. 1571 * The synchronization is done using two monitors: 1572 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1573 * reduction engine (DMA5/TPC7). 1574 * 1575 * Rest of the jobs goes to the collective slave queues which will 1576 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1577 */ 1578 for (i = 0 ; i < num_jobs ; i++) { 1579 if (i == 0) { 1580 queue_id = wait_queue_id; 1581 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1582 HL_COLLECTIVE_MASTER, queue_id, 1583 wait_queue_id, encaps_signal_offset); 1584 } else { 1585 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1586 if (gaudi->hw_cap_initialized & 1587 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1588 skip = false; 1589 else 1590 skip = true; 1591 1592 queue_id = nic_queue; 1593 nic_queue += 4; 1594 nic_idx++; 1595 1596 if (skip) 1597 continue; 1598 } else { 1599 queue_id = collective_queue; 1600 } 1601 1602 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1603 HL_COLLECTIVE_SLAVE, queue_id, 1604 wait_queue_id, encaps_signal_offset); 1605 } 1606 1607 if (rc) 1608 return rc; 1609 } 1610 1611 return rc; 1612 } 1613 1614 static int gaudi_late_init(struct hl_device *hdev) 1615 { 1616 struct gaudi_device *gaudi = hdev->asic_specific; 1617 int rc; 1618 1619 rc = gaudi->cpucp_info_get(hdev); 1620 if (rc) { 1621 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1622 return rc; 1623 } 1624 1625 if ((hdev->card_type == cpucp_card_type_pci) && 1626 (hdev->nic_ports_mask & 0x3)) { 1627 dev_info(hdev->dev, 1628 "PCI card detected, only 8 ports are enabled\n"); 1629 hdev->nic_ports_mask &= ~0x3; 1630 1631 /* Stop and disable unused NIC QMANs */ 1632 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1633 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1634 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1635 1636 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1637 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1638 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1639 1640 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1641 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1642 1643 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1644 } 1645 1646 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1647 if (rc) { 1648 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1649 return rc; 1650 } 1651 1652 /* Scrub both SRAM and DRAM */ 1653 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1654 if (rc) 1655 goto disable_pci_access; 1656 1657 rc = gaudi_fetch_psoc_frequency(hdev); 1658 if (rc) { 1659 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1660 goto disable_pci_access; 1661 } 1662 1663 rc = gaudi_mmu_clear_pgt_range(hdev); 1664 if (rc) { 1665 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1666 goto disable_pci_access; 1667 } 1668 1669 rc = gaudi_init_tpc_mem(hdev); 1670 if (rc) { 1671 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1672 goto disable_pci_access; 1673 } 1674 1675 rc = gaudi_collective_init(hdev); 1676 if (rc) { 1677 dev_err(hdev->dev, "Failed to init collective\n"); 1678 goto disable_pci_access; 1679 } 1680 1681 /* We only support a single ASID for the user, so for the sake of optimization, just 1682 * initialize the ASID one time during device initialization with the fixed value of 1 1683 */ 1684 gaudi_mmu_prepare(hdev, 1); 1685 1686 hl_fw_set_pll_profile(hdev); 1687 1688 return 0; 1689 1690 disable_pci_access: 1691 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1692 1693 return rc; 1694 } 1695 1696 static void gaudi_late_fini(struct hl_device *hdev) 1697 { 1698 hl_hwmon_release_resources(hdev); 1699 } 1700 1701 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1702 { 1703 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1704 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1705 int i, j, rc = 0; 1706 1707 /* 1708 * The device CPU works with 40-bits addresses, while bit 39 must be set 1709 * to '1' when accessing the host. 1710 * Bits 49:39 of the full host address are saved for a later 1711 * configuration of the HW to perform extension to 50 bits. 1712 * Because there is a single HW register that holds the extension bits, 1713 * these bits must be identical in all allocated range. 1714 */ 1715 1716 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1717 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1718 &dma_addr_arr[i], 1719 GFP_KERNEL | __GFP_ZERO); 1720 if (!virt_addr_arr[i]) { 1721 rc = -ENOMEM; 1722 goto free_dma_mem_arr; 1723 } 1724 1725 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1726 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1727 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1728 break; 1729 } 1730 1731 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1732 dev_err(hdev->dev, 1733 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1734 rc = -EFAULT; 1735 goto free_dma_mem_arr; 1736 } 1737 1738 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1739 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1740 hdev->cpu_pci_msb_addr = 1741 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1742 1743 if (!hdev->asic_prop.fw_security_enabled) 1744 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1745 1746 free_dma_mem_arr: 1747 for (j = 0 ; j < i ; j++) 1748 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1749 dma_addr_arr[j]); 1750 1751 return rc; 1752 } 1753 1754 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1755 { 1756 struct gaudi_device *gaudi = hdev->asic_specific; 1757 struct gaudi_internal_qman_info *q; 1758 u32 i; 1759 1760 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1761 q = &gaudi->internal_qmans[i]; 1762 if (!q->pq_kernel_addr) 1763 continue; 1764 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1765 } 1766 } 1767 1768 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1769 { 1770 struct gaudi_device *gaudi = hdev->asic_specific; 1771 struct gaudi_internal_qman_info *q; 1772 int rc, i; 1773 1774 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1775 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1776 continue; 1777 1778 q = &gaudi->internal_qmans[i]; 1779 1780 switch (i) { 1781 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1782 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1783 break; 1784 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1785 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1786 break; 1787 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1788 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1789 break; 1790 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1791 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1792 break; 1793 default: 1794 dev_err(hdev->dev, "Bad internal queue index %d", i); 1795 rc = -EINVAL; 1796 goto free_internal_qmans_pq_mem; 1797 } 1798 1799 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1800 GFP_KERNEL | __GFP_ZERO); 1801 if (!q->pq_kernel_addr) { 1802 rc = -ENOMEM; 1803 goto free_internal_qmans_pq_mem; 1804 } 1805 } 1806 1807 return 0; 1808 1809 free_internal_qmans_pq_mem: 1810 gaudi_free_internal_qmans_pq_mem(hdev); 1811 return rc; 1812 } 1813 1814 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1815 { 1816 struct asic_fixed_properties *prop = &hdev->asic_prop; 1817 struct pci_mem_region *region; 1818 1819 /* CFG */ 1820 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1821 region->region_base = CFG_BASE; 1822 region->region_size = CFG_SIZE; 1823 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1824 region->bar_size = CFG_BAR_SIZE; 1825 region->bar_id = CFG_BAR_ID; 1826 region->used = 1; 1827 1828 /* SRAM */ 1829 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1830 region->region_base = SRAM_BASE_ADDR; 1831 region->region_size = SRAM_SIZE; 1832 region->offset_in_bar = 0; 1833 region->bar_size = SRAM_BAR_SIZE; 1834 region->bar_id = SRAM_BAR_ID; 1835 region->used = 1; 1836 1837 /* DRAM */ 1838 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1839 region->region_base = DRAM_PHYS_BASE; 1840 region->region_size = hdev->asic_prop.dram_size; 1841 region->offset_in_bar = 0; 1842 region->bar_size = prop->dram_pci_bar_size; 1843 region->bar_id = HBM_BAR_ID; 1844 region->used = 1; 1845 1846 /* SP SRAM */ 1847 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1848 region->region_base = PSOC_SCRATCHPAD_ADDR; 1849 region->region_size = PSOC_SCRATCHPAD_SIZE; 1850 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1851 region->bar_size = CFG_BAR_SIZE; 1852 region->bar_id = CFG_BAR_ID; 1853 region->used = 1; 1854 } 1855 1856 static int gaudi_sw_init(struct hl_device *hdev) 1857 { 1858 struct gaudi_device *gaudi; 1859 u32 i, event_id = 0; 1860 int rc; 1861 1862 /* Allocate device structure */ 1863 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1864 if (!gaudi) 1865 return -ENOMEM; 1866 1867 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1868 if (gaudi_irq_map_table[i].valid) { 1869 if (event_id == GAUDI_EVENT_SIZE) { 1870 dev_err(hdev->dev, 1871 "Event array exceeds the limit of %u events\n", 1872 GAUDI_EVENT_SIZE); 1873 rc = -EINVAL; 1874 goto free_gaudi_device; 1875 } 1876 1877 gaudi->events[event_id++] = 1878 gaudi_irq_map_table[i].fc_id; 1879 } 1880 } 1881 1882 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1883 1884 hdev->asic_specific = gaudi; 1885 1886 /* Create DMA pool for small allocations */ 1887 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1888 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1889 if (!hdev->dma_pool) { 1890 dev_err(hdev->dev, "failed to create DMA pool\n"); 1891 rc = -ENOMEM; 1892 goto free_gaudi_device; 1893 } 1894 1895 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1896 if (rc) 1897 goto free_dma_pool; 1898 1899 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1900 if (!hdev->cpu_accessible_dma_pool) { 1901 dev_err(hdev->dev, 1902 "Failed to create CPU accessible DMA pool\n"); 1903 rc = -ENOMEM; 1904 goto free_cpu_dma_mem; 1905 } 1906 1907 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1908 (uintptr_t) hdev->cpu_accessible_dma_mem, 1909 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1910 if (rc) { 1911 dev_err(hdev->dev, 1912 "Failed to add memory to CPU accessible DMA pool\n"); 1913 rc = -EFAULT; 1914 goto free_cpu_accessible_dma_pool; 1915 } 1916 1917 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1918 if (rc) 1919 goto free_cpu_accessible_dma_pool; 1920 1921 spin_lock_init(&gaudi->hw_queues_lock); 1922 1923 hdev->supports_sync_stream = true; 1924 hdev->supports_coresight = true; 1925 hdev->supports_staged_submission = true; 1926 hdev->supports_wait_for_multi_cs = true; 1927 1928 hdev->asic_funcs->set_pci_memory_regions(hdev); 1929 hdev->stream_master_qid_arr = 1930 hdev->asic_funcs->get_stream_master_qid_arr(); 1931 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1932 1933 return 0; 1934 1935 free_cpu_accessible_dma_pool: 1936 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1937 free_cpu_dma_mem: 1938 if (!hdev->asic_prop.fw_security_enabled) 1939 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1940 hdev->cpu_pci_msb_addr); 1941 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1942 hdev->cpu_accessible_dma_address); 1943 free_dma_pool: 1944 dma_pool_destroy(hdev->dma_pool); 1945 free_gaudi_device: 1946 kfree(gaudi); 1947 return rc; 1948 } 1949 1950 static int gaudi_sw_fini(struct hl_device *hdev) 1951 { 1952 struct gaudi_device *gaudi = hdev->asic_specific; 1953 1954 gaudi_free_internal_qmans_pq_mem(hdev); 1955 1956 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1957 1958 if (!hdev->asic_prop.fw_security_enabled) 1959 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1960 hdev->cpu_pci_msb_addr); 1961 1962 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1963 hdev->cpu_accessible_dma_address); 1964 1965 dma_pool_destroy(hdev->dma_pool); 1966 1967 kfree(gaudi); 1968 1969 return 0; 1970 } 1971 1972 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1973 { 1974 struct hl_device *hdev = arg; 1975 int i; 1976 1977 if (hdev->disabled) 1978 return IRQ_HANDLED; 1979 1980 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1981 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1982 1983 hl_irq_handler_eq(irq, &hdev->event_queue); 1984 1985 return IRQ_HANDLED; 1986 } 1987 1988 /* 1989 * For backward compatibility, new MSI interrupts should be set after the 1990 * existing CPU and NIC interrupts. 1991 */ 1992 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1993 bool cpu_eq) 1994 { 1995 int msi_vec; 1996 1997 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1998 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1999 GAUDI_EVENT_QUEUE_MSI_IDX); 2000 2001 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 2002 (nr + NIC_NUMBER_OF_ENGINES + 1); 2003 2004 return pci_irq_vector(hdev->pdev, msi_vec); 2005 } 2006 2007 static int gaudi_enable_msi_single(struct hl_device *hdev) 2008 { 2009 int rc, irq; 2010 2011 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2012 2013 irq = gaudi_pci_irq_vector(hdev, 0, false); 2014 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2015 "gaudi single msi", hdev); 2016 if (rc) 2017 dev_err(hdev->dev, 2018 "Failed to request single MSI IRQ\n"); 2019 2020 return rc; 2021 } 2022 2023 static int gaudi_enable_msi(struct hl_device *hdev) 2024 { 2025 struct gaudi_device *gaudi = hdev->asic_specific; 2026 int rc; 2027 2028 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2029 return 0; 2030 2031 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2032 if (rc < 0) { 2033 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2034 return rc; 2035 } 2036 2037 rc = gaudi_enable_msi_single(hdev); 2038 if (rc) 2039 goto free_pci_irq_vectors; 2040 2041 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2042 2043 return 0; 2044 2045 free_pci_irq_vectors: 2046 pci_free_irq_vectors(hdev->pdev); 2047 return rc; 2048 } 2049 2050 static void gaudi_sync_irqs(struct hl_device *hdev) 2051 { 2052 struct gaudi_device *gaudi = hdev->asic_specific; 2053 2054 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2055 return; 2056 2057 /* Wait for all pending IRQs to be finished */ 2058 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2059 } 2060 2061 static void gaudi_disable_msi(struct hl_device *hdev) 2062 { 2063 struct gaudi_device *gaudi = hdev->asic_specific; 2064 2065 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2066 return; 2067 2068 gaudi_sync_irqs(hdev); 2069 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2070 pci_free_irq_vectors(hdev->pdev); 2071 2072 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2073 } 2074 2075 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2076 { 2077 struct gaudi_device *gaudi = hdev->asic_specific; 2078 2079 if (hdev->asic_prop.fw_security_enabled) 2080 return; 2081 2082 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2083 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2084 return; 2085 2086 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2087 return; 2088 2089 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2091 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2093 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2095 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2097 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2098 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2099 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2101 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2103 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2105 2106 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2108 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2110 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2112 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2114 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2115 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2116 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2117 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2118 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2119 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2120 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2121 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2122 2123 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2125 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2127 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2129 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2131 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2132 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2133 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2134 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2135 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2136 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2137 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2138 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2139 2140 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2141 } 2142 2143 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2144 { 2145 struct gaudi_device *gaudi = hdev->asic_specific; 2146 2147 if (hdev->asic_prop.fw_security_enabled) 2148 return; 2149 2150 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2151 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2152 return; 2153 2154 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2155 return; 2156 2157 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2159 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2161 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2163 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2165 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2166 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2167 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2169 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2171 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2173 2174 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2176 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2178 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2180 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2182 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2183 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2184 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2185 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2186 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2187 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2188 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2189 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2190 2191 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2193 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2195 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2197 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2199 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2200 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2201 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2202 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2203 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2204 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2205 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2206 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2207 2208 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2209 } 2210 2211 static void gaudi_init_e2e(struct hl_device *hdev) 2212 { 2213 if (hdev->asic_prop.fw_security_enabled) 2214 return; 2215 2216 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2217 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2218 return; 2219 2220 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2221 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2222 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2223 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2224 2225 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2226 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2227 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2228 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2229 2230 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2231 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2232 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2233 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2234 2235 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2236 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2237 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2238 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2239 2240 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2241 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2242 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2243 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2244 2245 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2246 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2247 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2248 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2249 2250 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2251 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2252 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2253 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2254 2255 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2256 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2257 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2258 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2259 2260 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2261 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2262 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2263 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2264 2265 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2266 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2267 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2268 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2269 2270 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2271 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2272 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2273 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2274 2275 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2276 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2277 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2278 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2279 2280 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2281 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2282 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2283 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2284 2285 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2286 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2287 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2288 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2289 2290 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2291 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2292 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2293 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2294 2295 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2296 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2297 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2298 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2299 2300 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2301 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2302 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2303 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2304 2305 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2306 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2307 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2308 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2309 2310 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2311 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2312 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2313 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2314 2315 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2316 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2317 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2318 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2319 2320 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2321 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2322 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2323 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2324 2325 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2326 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2327 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2328 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2329 2330 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2331 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2332 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2333 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2334 2335 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2336 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2337 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2338 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2339 2340 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2341 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2342 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2343 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2344 2345 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2346 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2347 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2348 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2349 2350 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2351 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2352 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2353 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2354 2355 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2356 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2357 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2358 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2359 2360 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2361 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2362 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2363 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2364 2365 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2366 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2367 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2368 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2369 2370 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2371 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2372 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2373 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2374 2375 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2376 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2377 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2378 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2379 2380 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2381 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2382 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2383 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2384 2385 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2386 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2387 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2388 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2389 2390 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2391 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2392 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2393 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2394 2395 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2396 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2397 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2398 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2399 2400 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2401 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2402 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2403 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2404 2405 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2406 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2407 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2408 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2409 2410 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2411 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2412 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2413 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2414 2415 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2416 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2417 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2418 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2419 2420 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2421 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2422 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2423 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2424 2425 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2426 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2427 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2428 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2429 2430 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2431 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2432 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2433 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2434 2435 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2436 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2437 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2438 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2439 2440 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2441 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2442 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2443 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2444 2445 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2446 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2447 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2448 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2449 2450 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2451 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2452 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2453 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2454 2455 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2456 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2457 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2458 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2459 } 2460 2461 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2462 { 2463 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2464 2465 if (hdev->asic_prop.fw_security_enabled) 2466 return; 2467 2468 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2469 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2470 return; 2471 2472 hbm0_wr = 0x33333333; 2473 hbm0_rd = 0x77777777; 2474 hbm1_wr = 0x55555555; 2475 hbm1_rd = 0xDDDDDDDD; 2476 2477 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2478 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2479 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2480 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2481 2482 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2483 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2484 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2485 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2486 2487 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2488 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2489 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2490 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2491 2492 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2493 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2494 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2495 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2496 2497 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2498 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2499 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2500 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2501 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2502 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2503 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2506 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2509 2510 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2511 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2512 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2513 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2514 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2515 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2516 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2517 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2518 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2519 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2520 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2521 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2522 } 2523 2524 static void gaudi_init_golden_registers(struct hl_device *hdev) 2525 { 2526 u32 tpc_offset; 2527 int tpc_id, i; 2528 2529 gaudi_init_e2e(hdev); 2530 gaudi_init_hbm_cred(hdev); 2531 2532 for (tpc_id = 0, tpc_offset = 0; 2533 tpc_id < TPC_NUMBER_OF_ENGINES; 2534 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2535 /* Mask all arithmetic interrupts from TPC */ 2536 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2537 /* Set 16 cache lines */ 2538 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2539 ICACHE_FETCH_LINE_NUM, 2); 2540 } 2541 2542 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2543 for (i = 0 ; i < 128 ; i += 8) 2544 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2545 2546 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2547 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2548 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2549 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2550 } 2551 2552 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2553 int qman_id, dma_addr_t qman_pq_addr) 2554 { 2555 struct cpu_dyn_regs *dyn_regs = 2556 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2557 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2558 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2559 u32 q_off, dma_qm_offset; 2560 u32 dma_qm_err_cfg, irq_handler_offset; 2561 2562 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2563 2564 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2565 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2566 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2567 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2568 so_base_en_lo = lower_32_bits(CFG_BASE + 2569 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2570 so_base_en_hi = upper_32_bits(CFG_BASE + 2571 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2572 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2573 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2574 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2575 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2576 so_base_ws_lo = lower_32_bits(CFG_BASE + 2577 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2578 so_base_ws_hi = upper_32_bits(CFG_BASE + 2579 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2580 2581 q_off = dma_qm_offset + qman_id * 4; 2582 2583 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2584 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2585 2586 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2587 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2588 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2589 2590 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2591 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2592 QMAN_LDMA_SRC_OFFSET); 2593 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2594 QMAN_LDMA_DST_OFFSET); 2595 2596 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2597 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2598 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2599 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2600 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2601 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2602 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2603 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2604 2605 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2606 2607 /* The following configuration is needed only once per QMAN */ 2608 if (qman_id == 0) { 2609 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2610 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2611 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2612 2613 /* Configure RAZWI IRQ */ 2614 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2615 if (hdev->stop_on_err) 2616 dma_qm_err_cfg |= 2617 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2618 2619 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2620 2621 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2622 lower_32_bits(CFG_BASE + irq_handler_offset)); 2623 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2624 upper_32_bits(CFG_BASE + irq_handler_offset)); 2625 2626 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2627 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2628 dma_id); 2629 2630 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2631 QM_ARB_ERR_MSG_EN_MASK); 2632 2633 /* Set timeout to maximum */ 2634 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2635 2636 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2637 QMAN_EXTERNAL_MAKE_TRUSTED); 2638 2639 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2640 } 2641 } 2642 2643 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2644 { 2645 struct cpu_dyn_regs *dyn_regs = 2646 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2647 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2648 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2649 u32 irq_handler_offset; 2650 2651 /* Set to maximum possible according to physical size */ 2652 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2653 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2654 2655 /* WA for H/W bug H3-2116 */ 2656 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2657 2658 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2659 if (hdev->stop_on_err) 2660 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2661 2662 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2663 2664 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2665 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2666 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2667 2668 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2669 lower_32_bits(CFG_BASE + irq_handler_offset)); 2670 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2671 upper_32_bits(CFG_BASE + irq_handler_offset)); 2672 2673 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2674 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2675 WREG32(mmDMA0_CORE_PROT + dma_offset, 2676 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2677 /* If the channel is secured, it should be in MMU bypass mode */ 2678 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2679 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2680 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2681 } 2682 2683 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2684 u32 enable_mask) 2685 { 2686 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2687 2688 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2689 } 2690 2691 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2692 { 2693 struct gaudi_device *gaudi = hdev->asic_specific; 2694 struct hl_hw_queue *q; 2695 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2696 2697 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2698 return; 2699 2700 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2701 dma_id = gaudi_dma_assignment[i]; 2702 /* 2703 * For queues after the CPU Q need to add 1 to get the correct 2704 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2705 * order to get the correct MSI register. 2706 */ 2707 if (dma_id > 1) { 2708 cpu_skip = 1; 2709 nic_skip = NIC_NUMBER_OF_ENGINES; 2710 } else { 2711 cpu_skip = 0; 2712 nic_skip = 0; 2713 } 2714 2715 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2716 q_idx = 4 * dma_id + j + cpu_skip; 2717 q = &hdev->kernel_queues[q_idx]; 2718 q->cq_id = cq_id++; 2719 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2720 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2721 q->bus_address); 2722 } 2723 2724 gaudi_init_dma_core(hdev, dma_id); 2725 2726 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2727 } 2728 2729 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2730 } 2731 2732 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2733 int qman_id, u64 qman_base_addr) 2734 { 2735 struct cpu_dyn_regs *dyn_regs = 2736 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2737 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2738 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2739 u32 dma_qm_err_cfg, irq_handler_offset; 2740 u32 q_off, dma_qm_offset; 2741 2742 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2743 2744 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2745 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2746 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2747 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2748 so_base_en_lo = lower_32_bits(CFG_BASE + 2749 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2750 so_base_en_hi = upper_32_bits(CFG_BASE + 2751 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2752 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2753 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2754 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2755 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2756 so_base_ws_lo = lower_32_bits(CFG_BASE + 2757 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2758 so_base_ws_hi = upper_32_bits(CFG_BASE + 2759 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2760 2761 q_off = dma_qm_offset + qman_id * 4; 2762 2763 if (qman_id < 4) { 2764 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2765 lower_32_bits(qman_base_addr)); 2766 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2767 upper_32_bits(qman_base_addr)); 2768 2769 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2770 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2771 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2772 2773 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2774 QMAN_CPDMA_SIZE_OFFSET); 2775 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2776 QMAN_CPDMA_SRC_OFFSET); 2777 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2778 QMAN_CPDMA_DST_OFFSET); 2779 } else { 2780 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2781 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2782 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2783 2784 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2785 QMAN_LDMA_SIZE_OFFSET); 2786 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2787 QMAN_LDMA_SRC_OFFSET); 2788 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2789 QMAN_LDMA_DST_OFFSET); 2790 2791 /* Configure RAZWI IRQ */ 2792 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2793 if (hdev->stop_on_err) 2794 dma_qm_err_cfg |= 2795 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2796 2797 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2798 2799 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2800 lower_32_bits(CFG_BASE + irq_handler_offset)); 2801 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2802 upper_32_bits(CFG_BASE + irq_handler_offset)); 2803 2804 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2805 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2806 dma_id); 2807 2808 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2809 QM_ARB_ERR_MSG_EN_MASK); 2810 2811 /* Set timeout to maximum */ 2812 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2813 2814 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2815 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2816 QMAN_INTERNAL_MAKE_TRUSTED); 2817 } 2818 2819 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2820 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2821 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2822 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2823 2824 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2825 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2826 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2827 mtr_base_ws_lo); 2828 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2829 mtr_base_ws_hi); 2830 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2831 so_base_ws_lo); 2832 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2833 so_base_ws_hi); 2834 } 2835 } 2836 2837 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2838 { 2839 struct gaudi_device *gaudi = hdev->asic_specific; 2840 struct gaudi_internal_qman_info *q; 2841 u64 qman_base_addr; 2842 int i, j, dma_id, internal_q_index; 2843 2844 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2845 return; 2846 2847 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2848 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2849 2850 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2851 /* 2852 * Add the CPU queue in order to get the correct queue 2853 * number as all internal queue are placed after it 2854 */ 2855 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2856 2857 q = &gaudi->internal_qmans[internal_q_index]; 2858 qman_base_addr = (u64) q->pq_dma_addr; 2859 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2860 qman_base_addr); 2861 } 2862 2863 /* Initializing lower CP for HBM DMA QMAN */ 2864 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2865 2866 gaudi_init_dma_core(hdev, dma_id); 2867 2868 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2869 } 2870 2871 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2872 } 2873 2874 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2875 int qman_id, u64 qman_base_addr) 2876 { 2877 struct cpu_dyn_regs *dyn_regs = 2878 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2879 u32 mtr_base_lo, mtr_base_hi; 2880 u32 so_base_lo, so_base_hi; 2881 u32 irq_handler_offset; 2882 u32 q_off, mme_id; 2883 u32 mme_qm_err_cfg; 2884 2885 mtr_base_lo = lower_32_bits(CFG_BASE + 2886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2887 mtr_base_hi = upper_32_bits(CFG_BASE + 2888 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2889 so_base_lo = lower_32_bits(CFG_BASE + 2890 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2891 so_base_hi = upper_32_bits(CFG_BASE + 2892 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2893 2894 q_off = mme_offset + qman_id * 4; 2895 2896 if (qman_id < 4) { 2897 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2898 lower_32_bits(qman_base_addr)); 2899 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2900 upper_32_bits(qman_base_addr)); 2901 2902 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2903 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2904 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2905 2906 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2907 QMAN_CPDMA_SIZE_OFFSET); 2908 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2909 QMAN_CPDMA_SRC_OFFSET); 2910 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2911 QMAN_CPDMA_DST_OFFSET); 2912 } else { 2913 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2914 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2915 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2916 2917 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2918 QMAN_LDMA_SIZE_OFFSET); 2919 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2920 QMAN_LDMA_SRC_OFFSET); 2921 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2922 QMAN_LDMA_DST_OFFSET); 2923 2924 /* Configure RAZWI IRQ */ 2925 mme_id = mme_offset / 2926 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2927 2928 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2929 if (hdev->stop_on_err) 2930 mme_qm_err_cfg |= 2931 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2932 2933 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2934 2935 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2936 lower_32_bits(CFG_BASE + irq_handler_offset)); 2937 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2938 upper_32_bits(CFG_BASE + irq_handler_offset)); 2939 2940 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2941 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2942 mme_id); 2943 2944 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2945 QM_ARB_ERR_MSG_EN_MASK); 2946 2947 /* Set timeout to maximum */ 2948 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 2949 2950 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2951 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2952 QMAN_INTERNAL_MAKE_TRUSTED); 2953 } 2954 2955 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2956 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2957 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2958 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2959 } 2960 2961 static void gaudi_init_mme_qmans(struct hl_device *hdev) 2962 { 2963 struct gaudi_device *gaudi = hdev->asic_specific; 2964 struct gaudi_internal_qman_info *q; 2965 u64 qman_base_addr; 2966 u32 mme_offset; 2967 int i, internal_q_index; 2968 2969 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2970 return; 2971 2972 /* 2973 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2974 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2975 */ 2976 2977 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2978 2979 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2980 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2981 q = &gaudi->internal_qmans[internal_q_index]; 2982 qman_base_addr = (u64) q->pq_dma_addr; 2983 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2984 qman_base_addr); 2985 if (i == 3) 2986 mme_offset = 0; 2987 } 2988 2989 /* Initializing lower CP for MME QMANs */ 2990 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2991 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2992 gaudi_init_mme_qman(hdev, 0, 4, 0); 2993 2994 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2995 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2996 2997 gaudi->hw_cap_initialized |= HW_CAP_MME; 2998 } 2999 3000 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 3001 int qman_id, u64 qman_base_addr) 3002 { 3003 struct cpu_dyn_regs *dyn_regs = 3004 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3005 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3006 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3007 u32 tpc_qm_err_cfg, irq_handler_offset; 3008 u32 q_off, tpc_id; 3009 3010 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3011 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3012 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3014 so_base_en_lo = lower_32_bits(CFG_BASE + 3015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3016 so_base_en_hi = upper_32_bits(CFG_BASE + 3017 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3018 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3019 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3020 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3021 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3022 so_base_ws_lo = lower_32_bits(CFG_BASE + 3023 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3024 so_base_ws_hi = upper_32_bits(CFG_BASE + 3025 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3026 3027 q_off = tpc_offset + qman_id * 4; 3028 3029 tpc_id = tpc_offset / 3030 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3031 3032 if (qman_id < 4) { 3033 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3034 lower_32_bits(qman_base_addr)); 3035 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3036 upper_32_bits(qman_base_addr)); 3037 3038 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3039 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3040 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3041 3042 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3043 QMAN_CPDMA_SIZE_OFFSET); 3044 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3045 QMAN_CPDMA_SRC_OFFSET); 3046 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3047 QMAN_CPDMA_DST_OFFSET); 3048 } else { 3049 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3050 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3051 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3052 3053 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3054 QMAN_LDMA_SIZE_OFFSET); 3055 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3056 QMAN_LDMA_SRC_OFFSET); 3057 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3058 QMAN_LDMA_DST_OFFSET); 3059 3060 /* Configure RAZWI IRQ */ 3061 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3062 if (hdev->stop_on_err) 3063 tpc_qm_err_cfg |= 3064 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3065 3066 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3067 3068 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3069 lower_32_bits(CFG_BASE + irq_handler_offset)); 3070 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3071 upper_32_bits(CFG_BASE + irq_handler_offset)); 3072 3073 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3074 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3075 tpc_id); 3076 3077 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3078 QM_ARB_ERR_MSG_EN_MASK); 3079 3080 /* Set timeout to maximum */ 3081 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3082 3083 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3084 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3085 QMAN_INTERNAL_MAKE_TRUSTED); 3086 } 3087 3088 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3089 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3090 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3091 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3092 3093 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3094 if (tpc_id == 6) { 3095 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3096 mtr_base_ws_lo); 3097 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3098 mtr_base_ws_hi); 3099 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3100 so_base_ws_lo); 3101 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3102 so_base_ws_hi); 3103 } 3104 } 3105 3106 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3107 { 3108 struct gaudi_device *gaudi = hdev->asic_specific; 3109 struct gaudi_internal_qman_info *q; 3110 u64 qman_base_addr; 3111 u32 so_base_hi, tpc_offset = 0; 3112 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3113 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3114 int i, tpc_id, internal_q_index; 3115 3116 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3117 return; 3118 3119 so_base_hi = upper_32_bits(CFG_BASE + 3120 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3121 3122 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3123 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3124 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3125 tpc_id * QMAN_STREAMS + i; 3126 q = &gaudi->internal_qmans[internal_q_index]; 3127 qman_base_addr = (u64) q->pq_dma_addr; 3128 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3129 qman_base_addr); 3130 3131 if (i == 3) { 3132 /* Initializing lower CP for TPC QMAN */ 3133 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3134 3135 /* Enable the QMAN and TPC channel */ 3136 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3137 QMAN_TPC_ENABLE); 3138 } 3139 } 3140 3141 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3142 so_base_hi); 3143 3144 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3145 3146 gaudi->hw_cap_initialized |= 3147 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3148 } 3149 } 3150 3151 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3152 int qman_id, u64 qman_base_addr, int nic_id) 3153 { 3154 struct cpu_dyn_regs *dyn_regs = 3155 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3156 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3157 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3158 u32 nic_qm_err_cfg, irq_handler_offset; 3159 u32 q_off; 3160 3161 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3162 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3163 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3164 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3165 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3166 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3167 so_base_en_hi = upper_32_bits(CFG_BASE + 3168 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3169 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3170 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3171 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3172 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3173 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3174 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3175 so_base_ws_hi = upper_32_bits(CFG_BASE + 3176 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3177 3178 q_off = nic_offset + qman_id * 4; 3179 3180 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3181 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3182 3183 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3184 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3185 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3186 3187 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3188 QMAN_LDMA_SIZE_OFFSET); 3189 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3190 QMAN_LDMA_SRC_OFFSET); 3191 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3192 QMAN_LDMA_DST_OFFSET); 3193 3194 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3195 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3196 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3197 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3198 3199 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3200 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3201 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3202 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3203 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3204 3205 if (qman_id == 0) { 3206 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3207 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3208 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3209 3210 /* Configure RAZWI IRQ */ 3211 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3212 if (hdev->stop_on_err) 3213 nic_qm_err_cfg |= 3214 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3215 3216 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3217 3218 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3219 lower_32_bits(CFG_BASE + irq_handler_offset)); 3220 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3221 upper_32_bits(CFG_BASE + irq_handler_offset)); 3222 3223 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3224 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3225 nic_id); 3226 3227 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3228 QM_ARB_ERR_MSG_EN_MASK); 3229 3230 /* Set timeout to maximum */ 3231 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3232 3233 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3234 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3235 QMAN_INTERNAL_MAKE_TRUSTED); 3236 } 3237 } 3238 3239 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3240 { 3241 struct gaudi_device *gaudi = hdev->asic_specific; 3242 struct gaudi_internal_qman_info *q; 3243 u64 qman_base_addr; 3244 u32 nic_offset = 0; 3245 u32 nic_delta_between_qmans = 3246 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3247 u32 nic_delta_between_nics = 3248 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3249 int i, nic_id, internal_q_index; 3250 3251 if (!hdev->nic_ports_mask) 3252 return; 3253 3254 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3255 return; 3256 3257 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3258 3259 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3260 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3261 nic_offset += nic_delta_between_qmans; 3262 if (nic_id & 1) { 3263 nic_offset -= (nic_delta_between_qmans * 2); 3264 nic_offset += nic_delta_between_nics; 3265 } 3266 continue; 3267 } 3268 3269 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3270 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3271 nic_id * QMAN_STREAMS + i; 3272 q = &gaudi->internal_qmans[internal_q_index]; 3273 qman_base_addr = (u64) q->pq_dma_addr; 3274 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3275 qman_base_addr, nic_id); 3276 } 3277 3278 /* Enable the QMAN */ 3279 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3280 3281 nic_offset += nic_delta_between_qmans; 3282 if (nic_id & 1) { 3283 nic_offset -= (nic_delta_between_qmans * 2); 3284 nic_offset += nic_delta_between_nics; 3285 } 3286 3287 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3288 } 3289 } 3290 3291 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3292 { 3293 struct gaudi_device *gaudi = hdev->asic_specific; 3294 3295 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3296 return; 3297 3298 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3299 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3300 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3301 } 3302 3303 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3304 { 3305 struct gaudi_device *gaudi = hdev->asic_specific; 3306 3307 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3308 return; 3309 3310 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3311 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3312 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3313 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3314 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3315 } 3316 3317 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3318 { 3319 struct gaudi_device *gaudi = hdev->asic_specific; 3320 3321 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3322 return; 3323 3324 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3325 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3326 } 3327 3328 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3329 { 3330 struct gaudi_device *gaudi = hdev->asic_specific; 3331 u32 tpc_offset = 0; 3332 int tpc_id; 3333 3334 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3335 return; 3336 3337 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3338 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3339 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3340 } 3341 } 3342 3343 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3344 { 3345 struct gaudi_device *gaudi = hdev->asic_specific; 3346 u32 nic_mask, nic_offset = 0; 3347 u32 nic_delta_between_qmans = 3348 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3349 u32 nic_delta_between_nics = 3350 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3351 int nic_id; 3352 3353 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3354 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3355 3356 if (gaudi->hw_cap_initialized & nic_mask) 3357 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3358 3359 nic_offset += nic_delta_between_qmans; 3360 if (nic_id & 1) { 3361 nic_offset -= (nic_delta_between_qmans * 2); 3362 nic_offset += nic_delta_between_nics; 3363 } 3364 } 3365 } 3366 3367 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3368 { 3369 struct gaudi_device *gaudi = hdev->asic_specific; 3370 3371 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3372 return; 3373 3374 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3375 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3376 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3377 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3378 } 3379 3380 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3381 { 3382 struct gaudi_device *gaudi = hdev->asic_specific; 3383 3384 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3385 return; 3386 3387 /* Stop CPs of HBM DMA QMANs */ 3388 3389 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3390 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3391 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3392 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3393 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3394 } 3395 3396 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3397 { 3398 struct gaudi_device *gaudi = hdev->asic_specific; 3399 3400 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3401 return; 3402 3403 /* Stop CPs of MME QMANs */ 3404 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3405 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3406 } 3407 3408 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3409 { 3410 struct gaudi_device *gaudi = hdev->asic_specific; 3411 3412 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3413 return; 3414 3415 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3416 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3417 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3418 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3419 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3420 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3421 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3422 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3423 } 3424 3425 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3426 { 3427 struct gaudi_device *gaudi = hdev->asic_specific; 3428 3429 /* Stop upper CPs of QMANs */ 3430 3431 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3432 WREG32(mmNIC0_QM0_GLBL_CFG1, 3433 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3434 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3435 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3436 3437 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3438 WREG32(mmNIC0_QM1_GLBL_CFG1, 3439 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3440 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3441 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3442 3443 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3444 WREG32(mmNIC1_QM0_GLBL_CFG1, 3445 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3446 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3447 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3448 3449 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3450 WREG32(mmNIC1_QM1_GLBL_CFG1, 3451 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3452 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3453 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3454 3455 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3456 WREG32(mmNIC2_QM0_GLBL_CFG1, 3457 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3458 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3459 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3460 3461 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3462 WREG32(mmNIC2_QM1_GLBL_CFG1, 3463 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3464 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3465 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3466 3467 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3468 WREG32(mmNIC3_QM0_GLBL_CFG1, 3469 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3470 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3471 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3472 3473 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3474 WREG32(mmNIC3_QM1_GLBL_CFG1, 3475 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3476 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3477 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3478 3479 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3480 WREG32(mmNIC4_QM0_GLBL_CFG1, 3481 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3482 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3483 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3484 3485 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3486 WREG32(mmNIC4_QM1_GLBL_CFG1, 3487 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3488 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3489 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3490 } 3491 3492 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3493 { 3494 struct gaudi_device *gaudi = hdev->asic_specific; 3495 3496 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3497 return; 3498 3499 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3500 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3501 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3502 } 3503 3504 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3505 { 3506 struct gaudi_device *gaudi = hdev->asic_specific; 3507 3508 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3509 return; 3510 3511 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3512 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3513 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3514 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3515 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3516 } 3517 3518 static void gaudi_mme_stall(struct hl_device *hdev) 3519 { 3520 struct gaudi_device *gaudi = hdev->asic_specific; 3521 3522 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3523 return; 3524 3525 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3526 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3527 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3528 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3529 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3530 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3531 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3532 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3533 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3534 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3535 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3536 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3537 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3538 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3539 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3540 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3541 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3542 } 3543 3544 static void gaudi_tpc_stall(struct hl_device *hdev) 3545 { 3546 struct gaudi_device *gaudi = hdev->asic_specific; 3547 3548 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3549 return; 3550 3551 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3552 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3553 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3554 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3555 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3556 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3557 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3558 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3559 } 3560 3561 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3562 { 3563 u32 qman_offset; 3564 int i; 3565 3566 if (hdev->asic_prop.fw_security_enabled) 3567 return; 3568 3569 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3570 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3571 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3572 3573 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3574 } 3575 3576 WREG32(mmMME0_QM_CGM_CFG, 0); 3577 WREG32(mmMME0_QM_CGM_CFG1, 0); 3578 WREG32(mmMME2_QM_CGM_CFG, 0); 3579 WREG32(mmMME2_QM_CGM_CFG1, 0); 3580 3581 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3582 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3583 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3584 3585 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3586 } 3587 } 3588 3589 static void gaudi_enable_timestamp(struct hl_device *hdev) 3590 { 3591 /* Disable the timestamp counter */ 3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3593 3594 /* Zero the lower/upper parts of the 64-bit counter */ 3595 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3596 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3597 3598 /* Enable the counter */ 3599 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3600 } 3601 3602 static void gaudi_disable_timestamp(struct hl_device *hdev) 3603 { 3604 /* Disable the timestamp counter */ 3605 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3606 } 3607 3608 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3609 { 3610 u32 wait_timeout_ms; 3611 3612 if (hdev->pldm) 3613 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3614 else 3615 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3616 3617 if (fw_reset) 3618 goto skip_engines; 3619 3620 gaudi_stop_nic_qmans(hdev); 3621 gaudi_stop_mme_qmans(hdev); 3622 gaudi_stop_tpc_qmans(hdev); 3623 gaudi_stop_hbm_dma_qmans(hdev); 3624 gaudi_stop_pci_dma_qmans(hdev); 3625 3626 msleep(wait_timeout_ms); 3627 3628 gaudi_pci_dma_stall(hdev); 3629 gaudi_hbm_dma_stall(hdev); 3630 gaudi_tpc_stall(hdev); 3631 gaudi_mme_stall(hdev); 3632 3633 msleep(wait_timeout_ms); 3634 3635 gaudi_disable_nic_qmans(hdev); 3636 gaudi_disable_mme_qmans(hdev); 3637 gaudi_disable_tpc_qmans(hdev); 3638 gaudi_disable_hbm_dma_qmans(hdev); 3639 gaudi_disable_pci_dma_qmans(hdev); 3640 3641 gaudi_disable_timestamp(hdev); 3642 3643 skip_engines: 3644 gaudi_disable_msi(hdev); 3645 } 3646 3647 static int gaudi_mmu_init(struct hl_device *hdev) 3648 { 3649 struct asic_fixed_properties *prop = &hdev->asic_prop; 3650 struct gaudi_device *gaudi = hdev->asic_specific; 3651 u64 hop0_addr; 3652 int rc, i; 3653 3654 if (!hdev->mmu_enable) 3655 return 0; 3656 3657 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3658 return 0; 3659 3660 for (i = 0 ; i < prop->max_asid ; i++) { 3661 hop0_addr = prop->mmu_pgt_addr + 3662 (i * prop->mmu_hop_table_size); 3663 3664 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3665 if (rc) { 3666 dev_err(hdev->dev, 3667 "failed to set hop0 addr for asid %d\n", i); 3668 return rc; 3669 } 3670 } 3671 3672 /* init MMU cache manage page */ 3673 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3674 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3675 3676 /* mem cache invalidation */ 3677 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3678 3679 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3680 if (rc) 3681 return rc; 3682 3683 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3684 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3685 3686 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3687 3688 /* 3689 * The H/W expects the first PI after init to be 1. After wraparound 3690 * we'll write 0. 3691 */ 3692 gaudi->mmu_cache_inv_pi = 1; 3693 3694 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3695 3696 return 0; 3697 } 3698 3699 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3700 { 3701 void __iomem *dst; 3702 3703 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3704 3705 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3706 } 3707 3708 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3709 { 3710 void __iomem *dst; 3711 3712 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3713 3714 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3715 } 3716 3717 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3718 { 3719 struct dynamic_fw_load_mgr *dynamic_loader; 3720 struct cpu_dyn_regs *dyn_regs; 3721 3722 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3723 3724 /* 3725 * here we update initial values for few specific dynamic regs (as 3726 * before reading the first descriptor from FW those value has to be 3727 * hard-coded) in later stages of the protocol those values will be 3728 * updated automatically by reading the FW descriptor so data there 3729 * will always be up-to-date 3730 */ 3731 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3732 dyn_regs->kmd_msg_to_cpu = 3733 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3734 dyn_regs->cpu_cmd_status_to_host = 3735 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3736 3737 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3738 } 3739 3740 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3741 { 3742 struct static_fw_load_mgr *static_loader; 3743 3744 static_loader = &hdev->fw_loader.static_loader; 3745 3746 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3747 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3748 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3749 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3750 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3751 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3752 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3753 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3754 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3755 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3756 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3757 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3758 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3759 GAUDI_PLDM_RESET_WAIT_MSEC : 3760 GAUDI_CPU_RESET_WAIT_MSEC; 3761 } 3762 3763 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3764 { 3765 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3766 3767 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3768 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3769 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3770 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3771 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3772 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3773 } 3774 3775 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3776 { 3777 struct asic_fixed_properties *prop = &hdev->asic_prop; 3778 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3779 3780 /* fill common fields */ 3781 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3782 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3783 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3784 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3785 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3786 fw_loader->skip_bmc = !hdev->bmc_enable; 3787 fw_loader->sram_bar_id = SRAM_BAR_ID; 3788 fw_loader->dram_bar_id = HBM_BAR_ID; 3789 3790 if (prop->dynamic_fw_load) 3791 gaudi_init_dynamic_firmware_loader(hdev); 3792 else 3793 gaudi_init_static_firmware_loader(hdev); 3794 } 3795 3796 static int gaudi_init_cpu(struct hl_device *hdev) 3797 { 3798 struct gaudi_device *gaudi = hdev->asic_specific; 3799 int rc; 3800 3801 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3802 return 0; 3803 3804 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3805 return 0; 3806 3807 /* 3808 * The device CPU works with 40 bits addresses. 3809 * This register sets the extension to 50 bits. 3810 */ 3811 if (!hdev->asic_prop.fw_security_enabled) 3812 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3813 3814 rc = hl_fw_init_cpu(hdev); 3815 3816 if (rc) 3817 return rc; 3818 3819 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3820 3821 return 0; 3822 } 3823 3824 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3825 { 3826 struct cpu_dyn_regs *dyn_regs = 3827 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3828 struct asic_fixed_properties *prop = &hdev->asic_prop; 3829 struct gaudi_device *gaudi = hdev->asic_specific; 3830 u32 status, irq_handler_offset; 3831 struct hl_eq *eq; 3832 struct hl_hw_queue *cpu_pq = 3833 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3834 int err; 3835 3836 if (!hdev->cpu_queues_enable) 3837 return 0; 3838 3839 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3840 return 0; 3841 3842 eq = &hdev->event_queue; 3843 3844 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3845 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3846 3847 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3848 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3849 3850 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3851 lower_32_bits(hdev->cpu_accessible_dma_address)); 3852 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3853 upper_32_bits(hdev->cpu_accessible_dma_address)); 3854 3855 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3856 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3857 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3858 3859 /* Used for EQ CI */ 3860 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3861 3862 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3863 3864 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3865 3866 irq_handler_offset = prop->gic_interrupts_enable ? 3867 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3868 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3869 3870 WREG32(irq_handler_offset, 3871 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3872 3873 err = hl_poll_timeout( 3874 hdev, 3875 mmCPU_IF_QUEUE_INIT, 3876 status, 3877 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3878 1000, 3879 cpu_timeout); 3880 3881 if (err) { 3882 dev_err(hdev->dev, 3883 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3884 return -EIO; 3885 } 3886 3887 /* update FW application security bits */ 3888 if (prop->fw_cpu_boot_dev_sts0_valid) 3889 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3890 if (prop->fw_cpu_boot_dev_sts1_valid) 3891 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3892 3893 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3894 return 0; 3895 } 3896 3897 static void gaudi_pre_hw_init(struct hl_device *hdev) 3898 { 3899 /* Perform read from the device to make sure device is up */ 3900 RREG32(mmHW_STATE); 3901 3902 if (!hdev->asic_prop.fw_security_enabled) { 3903 /* Set the access through PCI bars (Linux driver only) as 3904 * secured 3905 */ 3906 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3907 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3908 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3909 3910 /* Perform read to flush the waiting writes to ensure 3911 * configuration was set in the device 3912 */ 3913 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3914 } 3915 3916 /* 3917 * Let's mark in the H/W that we have reached this point. We check 3918 * this value in the reset_before_init function to understand whether 3919 * we need to reset the chip before doing H/W init. This register is 3920 * cleared by the H/W upon H/W reset 3921 */ 3922 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3923 } 3924 3925 static int gaudi_hw_init(struct hl_device *hdev) 3926 { 3927 struct gaudi_device *gaudi = hdev->asic_specific; 3928 int rc; 3929 3930 gaudi_pre_hw_init(hdev); 3931 3932 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3933 * So we set it here and if anyone tries to move it later to 3934 * a different address, there will be an error 3935 */ 3936 if (hdev->asic_prop.iatu_done_by_fw) 3937 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3938 3939 /* 3940 * Before pushing u-boot/linux to device, need to set the hbm bar to 3941 * base address of dram 3942 */ 3943 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 3944 dev_err(hdev->dev, 3945 "failed to map HBM bar to DRAM base address\n"); 3946 return -EIO; 3947 } 3948 3949 rc = gaudi_init_cpu(hdev); 3950 if (rc) { 3951 dev_err(hdev->dev, "failed to initialize CPU\n"); 3952 return rc; 3953 } 3954 3955 /* In case the clock gating was enabled in preboot we need to disable 3956 * it here before touching the MME/TPC registers. 3957 */ 3958 gaudi_disable_clock_gating(hdev); 3959 3960 /* SRAM scrambler must be initialized after CPU is running from HBM */ 3961 gaudi_init_scrambler_sram(hdev); 3962 3963 /* This is here just in case we are working without CPU */ 3964 gaudi_init_scrambler_hbm(hdev); 3965 3966 gaudi_init_golden_registers(hdev); 3967 3968 rc = gaudi_mmu_init(hdev); 3969 if (rc) 3970 return rc; 3971 3972 gaudi_init_security(hdev); 3973 3974 gaudi_init_pci_dma_qmans(hdev); 3975 3976 gaudi_init_hbm_dma_qmans(hdev); 3977 3978 gaudi_init_mme_qmans(hdev); 3979 3980 gaudi_init_tpc_qmans(hdev); 3981 3982 gaudi_init_nic_qmans(hdev); 3983 3984 gaudi_enable_timestamp(hdev); 3985 3986 /* MSI must be enabled before CPU queues and NIC are initialized */ 3987 rc = gaudi_enable_msi(hdev); 3988 if (rc) 3989 goto disable_queues; 3990 3991 /* must be called after MSI was enabled */ 3992 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 3993 if (rc) { 3994 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3995 rc); 3996 goto disable_msi; 3997 } 3998 3999 /* Perform read from the device to flush all configuration */ 4000 RREG32(mmHW_STATE); 4001 4002 return 0; 4003 4004 disable_msi: 4005 gaudi_disable_msi(hdev); 4006 disable_queues: 4007 gaudi_disable_mme_qmans(hdev); 4008 gaudi_disable_pci_dma_qmans(hdev); 4009 4010 return rc; 4011 } 4012 4013 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4014 { 4015 struct cpu_dyn_regs *dyn_regs = 4016 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4017 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4018 struct gaudi_device *gaudi = hdev->asic_specific; 4019 bool driver_performs_reset; 4020 4021 if (!hard_reset) { 4022 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4023 return 0; 4024 } 4025 4026 if (hdev->pldm) { 4027 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4028 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4029 } else { 4030 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4031 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4032 } 4033 4034 if (fw_reset) { 4035 dev_dbg(hdev->dev, 4036 "Firmware performs HARD reset, going to wait %dms\n", 4037 reset_timeout_ms); 4038 4039 goto skip_reset; 4040 } 4041 4042 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4043 !hdev->asic_prop.hard_reset_done_by_fw); 4044 4045 /* Set device to handle FLR by H/W as we will put the device CPU to 4046 * halt mode 4047 */ 4048 if (driver_performs_reset) 4049 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4050 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4051 4052 /* If linux is loaded in the device CPU we need to communicate with it 4053 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4054 * registers in case of old F/Ws 4055 */ 4056 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4057 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4058 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4059 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4060 4061 WREG32(irq_handler_offset, 4062 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4063 4064 /* This is a hail-mary attempt to revive the card in the small chance that the 4065 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4066 * In that case, triggering reset through GIC won't help. We need to trigger the 4067 * reset as if Linux wasn't loaded. 4068 * 4069 * We do it only if the reset cause was HB, because that would be the indication 4070 * of such an event. 4071 * 4072 * In case watchdog hasn't expired but we still got HB, then this won't do any 4073 * damage. 4074 */ 4075 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4076 if (hdev->asic_prop.hard_reset_done_by_fw) 4077 hl_fw_ask_hard_reset_without_linux(hdev); 4078 else 4079 hl_fw_ask_halt_machine_without_linux(hdev); 4080 } 4081 } else { 4082 if (hdev->asic_prop.hard_reset_done_by_fw) 4083 hl_fw_ask_hard_reset_without_linux(hdev); 4084 else 4085 hl_fw_ask_halt_machine_without_linux(hdev); 4086 } 4087 4088 if (driver_performs_reset) { 4089 4090 /* Configure the reset registers. Must be done as early as 4091 * possible in case we fail during H/W initialization 4092 */ 4093 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4094 (CFG_RST_H_DMA_MASK | 4095 CFG_RST_H_MME_MASK | 4096 CFG_RST_H_SM_MASK | 4097 CFG_RST_H_TPC_7_MASK)); 4098 4099 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4100 4101 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4102 (CFG_RST_H_HBM_MASK | 4103 CFG_RST_H_TPC_7_MASK | 4104 CFG_RST_H_NIC_MASK | 4105 CFG_RST_H_SM_MASK | 4106 CFG_RST_H_DMA_MASK | 4107 CFG_RST_H_MME_MASK | 4108 CFG_RST_H_CPU_MASK | 4109 CFG_RST_H_MMU_MASK)); 4110 4111 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4112 (CFG_RST_L_IF_MASK | 4113 CFG_RST_L_PSOC_MASK | 4114 CFG_RST_L_TPC_MASK)); 4115 4116 msleep(cpu_timeout_ms); 4117 4118 /* Tell ASIC not to re-initialize PCIe */ 4119 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4120 4121 /* Restart BTL/BLR upon hard-reset */ 4122 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4123 4124 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4125 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4126 4127 dev_dbg(hdev->dev, 4128 "Issued HARD reset command, going to wait %dms\n", 4129 reset_timeout_ms); 4130 } else { 4131 dev_dbg(hdev->dev, 4132 "Firmware performs HARD reset, going to wait %dms\n", 4133 reset_timeout_ms); 4134 } 4135 4136 skip_reset: 4137 /* 4138 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4139 * itself is in reset. Need to wait until the reset is deasserted 4140 */ 4141 msleep(reset_timeout_ms); 4142 4143 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4144 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4145 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4146 return -ETIMEDOUT; 4147 } 4148 4149 if (gaudi) { 4150 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4151 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4152 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4153 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4154 HW_CAP_HBM_SCRAMBLER); 4155 4156 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4157 4158 hdev->device_cpu_is_halted = false; 4159 } 4160 return 0; 4161 } 4162 4163 static int gaudi_suspend(struct hl_device *hdev) 4164 { 4165 int rc; 4166 4167 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4168 if (rc) 4169 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4170 4171 return rc; 4172 } 4173 4174 static int gaudi_resume(struct hl_device *hdev) 4175 { 4176 return gaudi_init_iatu(hdev); 4177 } 4178 4179 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4180 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4181 { 4182 int rc; 4183 4184 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4185 VM_DONTCOPY | VM_NORESERVE); 4186 4187 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4188 (dma_addr - HOST_PHYS_BASE), size); 4189 if (rc) 4190 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4191 4192 return rc; 4193 } 4194 4195 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4196 { 4197 struct cpu_dyn_regs *dyn_regs = 4198 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4199 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4200 struct gaudi_device *gaudi = hdev->asic_specific; 4201 bool invalid_queue = false; 4202 int dma_id; 4203 4204 switch (hw_queue_id) { 4205 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4206 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4207 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4208 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4209 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4210 break; 4211 4212 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4213 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4214 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4215 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4216 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4217 break; 4218 4219 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4220 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4221 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4222 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4223 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4224 break; 4225 4226 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4227 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4228 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4229 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4230 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4231 break; 4232 4233 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4234 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4235 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4236 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4237 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4238 break; 4239 4240 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4241 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4242 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4243 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4244 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4245 break; 4246 4247 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4248 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4249 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4250 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4251 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4252 break; 4253 4254 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4255 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4256 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4257 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4258 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4259 break; 4260 4261 case GAUDI_QUEUE_ID_CPU_PQ: 4262 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4263 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4264 else 4265 invalid_queue = true; 4266 break; 4267 4268 case GAUDI_QUEUE_ID_MME_0_0: 4269 db_reg_offset = mmMME2_QM_PQ_PI_0; 4270 break; 4271 4272 case GAUDI_QUEUE_ID_MME_0_1: 4273 db_reg_offset = mmMME2_QM_PQ_PI_1; 4274 break; 4275 4276 case GAUDI_QUEUE_ID_MME_0_2: 4277 db_reg_offset = mmMME2_QM_PQ_PI_2; 4278 break; 4279 4280 case GAUDI_QUEUE_ID_MME_0_3: 4281 db_reg_offset = mmMME2_QM_PQ_PI_3; 4282 break; 4283 4284 case GAUDI_QUEUE_ID_MME_1_0: 4285 db_reg_offset = mmMME0_QM_PQ_PI_0; 4286 break; 4287 4288 case GAUDI_QUEUE_ID_MME_1_1: 4289 db_reg_offset = mmMME0_QM_PQ_PI_1; 4290 break; 4291 4292 case GAUDI_QUEUE_ID_MME_1_2: 4293 db_reg_offset = mmMME0_QM_PQ_PI_2; 4294 break; 4295 4296 case GAUDI_QUEUE_ID_MME_1_3: 4297 db_reg_offset = mmMME0_QM_PQ_PI_3; 4298 break; 4299 4300 case GAUDI_QUEUE_ID_TPC_0_0: 4301 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4302 break; 4303 4304 case GAUDI_QUEUE_ID_TPC_0_1: 4305 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4306 break; 4307 4308 case GAUDI_QUEUE_ID_TPC_0_2: 4309 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4310 break; 4311 4312 case GAUDI_QUEUE_ID_TPC_0_3: 4313 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4314 break; 4315 4316 case GAUDI_QUEUE_ID_TPC_1_0: 4317 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4318 break; 4319 4320 case GAUDI_QUEUE_ID_TPC_1_1: 4321 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4322 break; 4323 4324 case GAUDI_QUEUE_ID_TPC_1_2: 4325 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4326 break; 4327 4328 case GAUDI_QUEUE_ID_TPC_1_3: 4329 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4330 break; 4331 4332 case GAUDI_QUEUE_ID_TPC_2_0: 4333 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4334 break; 4335 4336 case GAUDI_QUEUE_ID_TPC_2_1: 4337 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4338 break; 4339 4340 case GAUDI_QUEUE_ID_TPC_2_2: 4341 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4342 break; 4343 4344 case GAUDI_QUEUE_ID_TPC_2_3: 4345 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4346 break; 4347 4348 case GAUDI_QUEUE_ID_TPC_3_0: 4349 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4350 break; 4351 4352 case GAUDI_QUEUE_ID_TPC_3_1: 4353 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4354 break; 4355 4356 case GAUDI_QUEUE_ID_TPC_3_2: 4357 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4358 break; 4359 4360 case GAUDI_QUEUE_ID_TPC_3_3: 4361 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4362 break; 4363 4364 case GAUDI_QUEUE_ID_TPC_4_0: 4365 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4366 break; 4367 4368 case GAUDI_QUEUE_ID_TPC_4_1: 4369 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4370 break; 4371 4372 case GAUDI_QUEUE_ID_TPC_4_2: 4373 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4374 break; 4375 4376 case GAUDI_QUEUE_ID_TPC_4_3: 4377 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4378 break; 4379 4380 case GAUDI_QUEUE_ID_TPC_5_0: 4381 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4382 break; 4383 4384 case GAUDI_QUEUE_ID_TPC_5_1: 4385 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4386 break; 4387 4388 case GAUDI_QUEUE_ID_TPC_5_2: 4389 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4390 break; 4391 4392 case GAUDI_QUEUE_ID_TPC_5_3: 4393 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4394 break; 4395 4396 case GAUDI_QUEUE_ID_TPC_6_0: 4397 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4398 break; 4399 4400 case GAUDI_QUEUE_ID_TPC_6_1: 4401 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4402 break; 4403 4404 case GAUDI_QUEUE_ID_TPC_6_2: 4405 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4406 break; 4407 4408 case GAUDI_QUEUE_ID_TPC_6_3: 4409 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4410 break; 4411 4412 case GAUDI_QUEUE_ID_TPC_7_0: 4413 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4414 break; 4415 4416 case GAUDI_QUEUE_ID_TPC_7_1: 4417 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4418 break; 4419 4420 case GAUDI_QUEUE_ID_TPC_7_2: 4421 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4422 break; 4423 4424 case GAUDI_QUEUE_ID_TPC_7_3: 4425 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4426 break; 4427 4428 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4429 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4430 invalid_queue = true; 4431 4432 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4433 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4434 break; 4435 4436 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4437 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4438 invalid_queue = true; 4439 4440 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4441 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4442 break; 4443 4444 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4445 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4446 invalid_queue = true; 4447 4448 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4449 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4450 break; 4451 4452 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4453 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4454 invalid_queue = true; 4455 4456 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4457 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4458 break; 4459 4460 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4461 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4462 invalid_queue = true; 4463 4464 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4465 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4466 break; 4467 4468 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4469 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4470 invalid_queue = true; 4471 4472 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4473 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4474 break; 4475 4476 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4477 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4478 invalid_queue = true; 4479 4480 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4481 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4482 break; 4483 4484 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4485 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4486 invalid_queue = true; 4487 4488 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4489 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4490 break; 4491 4492 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4493 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4494 invalid_queue = true; 4495 4496 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4497 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4498 break; 4499 4500 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4501 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4502 invalid_queue = true; 4503 4504 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4505 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4506 break; 4507 4508 default: 4509 invalid_queue = true; 4510 } 4511 4512 if (invalid_queue) { 4513 /* Should never get here */ 4514 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4515 hw_queue_id); 4516 return; 4517 } 4518 4519 db_value = pi; 4520 4521 /* ring the doorbell */ 4522 WREG32(db_reg_offset, db_value); 4523 4524 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4525 /* make sure device CPU will read latest data from host */ 4526 mb(); 4527 4528 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4529 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4530 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4531 4532 WREG32(irq_handler_offset, 4533 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4534 } 4535 } 4536 4537 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4538 struct hl_bd *bd) 4539 { 4540 __le64 *pbd = (__le64 *) bd; 4541 4542 /* The QMANs are on the host memory so a simple copy suffice */ 4543 pqe[0] = pbd[0]; 4544 pqe[1] = pbd[1]; 4545 } 4546 4547 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4548 dma_addr_t *dma_handle, gfp_t flags) 4549 { 4550 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4551 dma_handle, flags); 4552 4553 /* Shift to the device's base physical address of host memory */ 4554 if (kernel_addr) 4555 *dma_handle += HOST_PHYS_BASE; 4556 4557 return kernel_addr; 4558 } 4559 4560 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4561 void *cpu_addr, dma_addr_t dma_handle) 4562 { 4563 /* Cancel the device's base physical address of host memory */ 4564 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4565 4566 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4567 } 4568 4569 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4570 { 4571 struct asic_fixed_properties *prop = &hdev->asic_prop; 4572 u64 cur_addr = prop->dram_user_base_address; 4573 u32 chunk_size, busy; 4574 int rc, dma_id; 4575 4576 while (cur_addr < prop->dram_end_address) { 4577 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4578 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4579 4580 chunk_size = 4581 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4582 4583 dev_dbg(hdev->dev, 4584 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4585 cur_addr, cur_addr + chunk_size); 4586 4587 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4588 lower_32_bits(val)); 4589 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4590 upper_32_bits(val)); 4591 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4592 lower_32_bits(cur_addr)); 4593 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4594 upper_32_bits(cur_addr)); 4595 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4596 chunk_size); 4597 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4598 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4599 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4600 4601 cur_addr += chunk_size; 4602 4603 if (cur_addr == prop->dram_end_address) 4604 break; 4605 } 4606 4607 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4608 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4609 4610 rc = hl_poll_timeout( 4611 hdev, 4612 mmDMA0_CORE_STS0 + dma_offset, 4613 busy, 4614 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4615 1000, 4616 HBM_SCRUBBING_TIMEOUT_US); 4617 4618 if (rc) { 4619 dev_err(hdev->dev, 4620 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4621 dma_id); 4622 return -EIO; 4623 } 4624 } 4625 } 4626 4627 return 0; 4628 } 4629 4630 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4631 { 4632 struct asic_fixed_properties *prop = &hdev->asic_prop; 4633 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : 4634 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); 4635 u64 addr, size, val = hdev->memory_scrub_val; 4636 ktime_t timeout; 4637 int rc = 0; 4638 4639 if (!hdev->memory_scrub) 4640 return 0; 4641 4642 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4643 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4644 if (ktime_compare(ktime_get(), timeout) > 0) { 4645 dev_err(hdev->dev, "waiting for idle timeout\n"); 4646 return -ETIMEDOUT; 4647 } 4648 usleep_range((1000 >> 2) + 1, 1000); 4649 } 4650 4651 /* Scrub SRAM */ 4652 addr = prop->sram_user_base_address; 4653 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4654 4655 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4656 addr, addr + size, val); 4657 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4658 if (rc) { 4659 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4660 return rc; 4661 } 4662 4663 /* Scrub HBM using all DMA channels in parallel */ 4664 rc = gaudi_scrub_device_dram(hdev, val); 4665 if (rc) { 4666 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4667 return rc; 4668 } 4669 4670 return 0; 4671 } 4672 4673 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4674 u32 queue_id, dma_addr_t *dma_handle, 4675 u16 *queue_len) 4676 { 4677 struct gaudi_device *gaudi = hdev->asic_specific; 4678 struct gaudi_internal_qman_info *q; 4679 4680 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4681 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4682 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4683 return NULL; 4684 } 4685 4686 q = &gaudi->internal_qmans[queue_id]; 4687 *dma_handle = q->pq_dma_addr; 4688 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4689 4690 return q->pq_kernel_addr; 4691 } 4692 4693 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4694 u16 len, u32 timeout, u64 *result) 4695 { 4696 struct gaudi_device *gaudi = hdev->asic_specific; 4697 4698 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4699 if (result) 4700 *result = 0; 4701 return 0; 4702 } 4703 4704 if (!timeout) 4705 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4706 4707 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4708 timeout, result); 4709 } 4710 4711 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4712 { 4713 struct packet_msg_prot *fence_pkt; 4714 dma_addr_t pkt_dma_addr; 4715 u32 fence_val, tmp, timeout_usec; 4716 dma_addr_t fence_dma_addr; 4717 u32 *fence_ptr; 4718 int rc; 4719 4720 if (hdev->pldm) 4721 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4722 else 4723 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4724 4725 fence_val = GAUDI_QMAN0_FENCE_VAL; 4726 4727 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4728 if (!fence_ptr) { 4729 dev_err(hdev->dev, 4730 "Failed to allocate memory for H/W queue %d testing\n", 4731 hw_queue_id); 4732 return -ENOMEM; 4733 } 4734 4735 *fence_ptr = 0; 4736 4737 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4738 &pkt_dma_addr); 4739 if (!fence_pkt) { 4740 dev_err(hdev->dev, 4741 "Failed to allocate packet for H/W queue %d testing\n", 4742 hw_queue_id); 4743 rc = -ENOMEM; 4744 goto free_fence_ptr; 4745 } 4746 4747 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4748 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4749 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4750 4751 fence_pkt->ctl = cpu_to_le32(tmp); 4752 fence_pkt->value = cpu_to_le32(fence_val); 4753 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4754 4755 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4756 sizeof(struct packet_msg_prot), 4757 pkt_dma_addr); 4758 if (rc) { 4759 dev_err(hdev->dev, 4760 "Failed to send fence packet to H/W queue %d\n", 4761 hw_queue_id); 4762 goto free_pkt; 4763 } 4764 4765 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4766 1000, timeout_usec, true); 4767 4768 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4769 4770 if (rc == -ETIMEDOUT) { 4771 dev_err(hdev->dev, 4772 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4773 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4774 rc = -EIO; 4775 } 4776 4777 free_pkt: 4778 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4779 free_fence_ptr: 4780 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4781 return rc; 4782 } 4783 4784 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4785 { 4786 struct gaudi_device *gaudi = hdev->asic_specific; 4787 4788 /* 4789 * check capability here as send_cpu_message() won't update the result 4790 * value if no capability 4791 */ 4792 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4793 return 0; 4794 4795 return hl_fw_test_cpu_queue(hdev); 4796 } 4797 4798 static int gaudi_test_queues(struct hl_device *hdev) 4799 { 4800 int i, rc, ret_val = 0; 4801 4802 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4803 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4804 rc = gaudi_test_queue(hdev, i); 4805 if (rc) 4806 ret_val = -EINVAL; 4807 } 4808 } 4809 4810 rc = gaudi_test_cpu_queue(hdev); 4811 if (rc) 4812 ret_val = -EINVAL; 4813 4814 return ret_val; 4815 } 4816 4817 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4818 gfp_t mem_flags, dma_addr_t *dma_handle) 4819 { 4820 void *kernel_addr; 4821 4822 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4823 return NULL; 4824 4825 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4826 4827 /* Shift to the device's base physical address of host memory */ 4828 if (kernel_addr) 4829 *dma_handle += HOST_PHYS_BASE; 4830 4831 return kernel_addr; 4832 } 4833 4834 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4835 dma_addr_t dma_addr) 4836 { 4837 /* Cancel the device's base physical address of host memory */ 4838 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4839 4840 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4841 } 4842 4843 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4844 size_t size, dma_addr_t *dma_handle) 4845 { 4846 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4847 } 4848 4849 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4850 size_t size, void *vaddr) 4851 { 4852 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4853 } 4854 4855 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4856 { 4857 struct scatterlist *sg, *sg_next_iter; 4858 u32 count, dma_desc_cnt; 4859 u64 len, len_next; 4860 dma_addr_t addr, addr_next; 4861 4862 dma_desc_cnt = 0; 4863 4864 for_each_sgtable_dma_sg(sgt, sg, count) { 4865 len = sg_dma_len(sg); 4866 addr = sg_dma_address(sg); 4867 4868 if (len == 0) 4869 break; 4870 4871 while ((count + 1) < sgt->nents) { 4872 sg_next_iter = sg_next(sg); 4873 len_next = sg_dma_len(sg_next_iter); 4874 addr_next = sg_dma_address(sg_next_iter); 4875 4876 if (len_next == 0) 4877 break; 4878 4879 if ((addr + len == addr_next) && 4880 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4881 len += len_next; 4882 count++; 4883 sg = sg_next_iter; 4884 } else { 4885 break; 4886 } 4887 } 4888 4889 dma_desc_cnt++; 4890 } 4891 4892 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4893 } 4894 4895 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4896 struct hl_cs_parser *parser, 4897 struct packet_lin_dma *user_dma_pkt, 4898 u64 addr, enum dma_data_direction dir) 4899 { 4900 struct hl_userptr *userptr; 4901 int rc; 4902 4903 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4904 parser->job_userptr_list, &userptr)) 4905 goto already_pinned; 4906 4907 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4908 if (!userptr) 4909 return -ENOMEM; 4910 4911 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4912 userptr); 4913 if (rc) 4914 goto free_userptr; 4915 4916 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4917 4918 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); 4919 if (rc) { 4920 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4921 goto unpin_memory; 4922 } 4923 4924 userptr->dma_mapped = true; 4925 userptr->dir = dir; 4926 4927 already_pinned: 4928 parser->patched_cb_size += 4929 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4930 4931 return 0; 4932 4933 unpin_memory: 4934 list_del(&userptr->job_node); 4935 hl_unpin_host_memory(hdev, userptr); 4936 free_userptr: 4937 kfree(userptr); 4938 return rc; 4939 } 4940 4941 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4942 struct hl_cs_parser *parser, 4943 struct packet_lin_dma *user_dma_pkt, 4944 bool src_in_host) 4945 { 4946 enum dma_data_direction dir; 4947 bool skip_host_mem_pin = false, user_memset; 4948 u64 addr; 4949 int rc = 0; 4950 4951 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 4952 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 4953 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 4954 4955 if (src_in_host) { 4956 if (user_memset) 4957 skip_host_mem_pin = true; 4958 4959 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 4960 dir = DMA_TO_DEVICE; 4961 addr = le64_to_cpu(user_dma_pkt->src_addr); 4962 } else { 4963 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 4964 dir = DMA_FROM_DEVICE; 4965 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4966 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4967 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4968 } 4969 4970 if (skip_host_mem_pin) 4971 parser->patched_cb_size += sizeof(*user_dma_pkt); 4972 else 4973 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 4974 addr, dir); 4975 4976 return rc; 4977 } 4978 4979 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 4980 struct hl_cs_parser *parser, 4981 struct packet_lin_dma *user_dma_pkt) 4982 { 4983 bool src_in_host = false; 4984 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4985 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4986 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4987 4988 dev_dbg(hdev->dev, "DMA packet details:\n"); 4989 dev_dbg(hdev->dev, "source == 0x%llx\n", 4990 le64_to_cpu(user_dma_pkt->src_addr)); 4991 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 4992 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 4993 4994 /* 4995 * Special handling for DMA with size 0. Bypass all validations 4996 * because no transactions will be done except for WR_COMP, which 4997 * is not a security issue 4998 */ 4999 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5000 parser->patched_cb_size += sizeof(*user_dma_pkt); 5001 return 0; 5002 } 5003 5004 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5005 src_in_host = true; 5006 5007 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5008 src_in_host); 5009 } 5010 5011 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5012 struct hl_cs_parser *parser, 5013 struct packet_load_and_exe *user_pkt) 5014 { 5015 u32 cfg; 5016 5017 cfg = le32_to_cpu(user_pkt->cfg); 5018 5019 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5020 dev_err(hdev->dev, 5021 "User not allowed to use Load and Execute\n"); 5022 return -EPERM; 5023 } 5024 5025 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5026 5027 return 0; 5028 } 5029 5030 static int gaudi_validate_cb(struct hl_device *hdev, 5031 struct hl_cs_parser *parser, bool is_mmu) 5032 { 5033 u32 cb_parsed_length = 0; 5034 int rc = 0; 5035 5036 parser->patched_cb_size = 0; 5037 5038 /* cb_user_size is more than 0 so loop will always be executed */ 5039 while (cb_parsed_length < parser->user_cb_size) { 5040 enum packet_id pkt_id; 5041 u16 pkt_size; 5042 struct gaudi_packet *user_pkt; 5043 5044 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5045 5046 pkt_id = (enum packet_id) ( 5047 (le64_to_cpu(user_pkt->header) & 5048 PACKET_HEADER_PACKET_ID_MASK) >> 5049 PACKET_HEADER_PACKET_ID_SHIFT); 5050 5051 if (!validate_packet_id(pkt_id)) { 5052 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5053 rc = -EINVAL; 5054 break; 5055 } 5056 5057 pkt_size = gaudi_packet_sizes[pkt_id]; 5058 cb_parsed_length += pkt_size; 5059 if (cb_parsed_length > parser->user_cb_size) { 5060 dev_err(hdev->dev, 5061 "packet 0x%x is out of CB boundary\n", pkt_id); 5062 rc = -EINVAL; 5063 break; 5064 } 5065 5066 switch (pkt_id) { 5067 case PACKET_MSG_PROT: 5068 dev_err(hdev->dev, 5069 "User not allowed to use MSG_PROT\n"); 5070 rc = -EPERM; 5071 break; 5072 5073 case PACKET_CP_DMA: 5074 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5075 rc = -EPERM; 5076 break; 5077 5078 case PACKET_STOP: 5079 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5080 rc = -EPERM; 5081 break; 5082 5083 case PACKET_WREG_BULK: 5084 dev_err(hdev->dev, 5085 "User not allowed to use WREG_BULK\n"); 5086 rc = -EPERM; 5087 break; 5088 5089 case PACKET_LOAD_AND_EXE: 5090 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5091 (struct packet_load_and_exe *) user_pkt); 5092 break; 5093 5094 case PACKET_LIN_DMA: 5095 parser->contains_dma_pkt = true; 5096 if (is_mmu) 5097 parser->patched_cb_size += pkt_size; 5098 else 5099 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5100 (struct packet_lin_dma *) user_pkt); 5101 break; 5102 5103 case PACKET_WREG_32: 5104 case PACKET_MSG_LONG: 5105 case PACKET_MSG_SHORT: 5106 case PACKET_REPEAT: 5107 case PACKET_FENCE: 5108 case PACKET_NOP: 5109 case PACKET_ARB_POINT: 5110 parser->patched_cb_size += pkt_size; 5111 break; 5112 5113 default: 5114 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5115 pkt_id); 5116 rc = -EINVAL; 5117 break; 5118 } 5119 5120 if (rc) 5121 break; 5122 } 5123 5124 /* 5125 * The new CB should have space at the end for two MSG_PROT packets: 5126 * 1. Optional NOP padding for cacheline alignment 5127 * 2. A packet that will act as a completion packet 5128 * 3. A packet that will generate MSI interrupt 5129 */ 5130 if (parser->completion) 5131 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5132 parser->patched_cb_size); 5133 5134 return rc; 5135 } 5136 5137 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5138 struct hl_cs_parser *parser, 5139 struct packet_lin_dma *user_dma_pkt, 5140 struct packet_lin_dma *new_dma_pkt, 5141 u32 *new_dma_pkt_size) 5142 { 5143 struct hl_userptr *userptr; 5144 struct scatterlist *sg, *sg_next_iter; 5145 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5146 u64 len, len_next; 5147 dma_addr_t dma_addr, dma_addr_next; 5148 u64 device_memory_addr, addr; 5149 enum dma_data_direction dir; 5150 struct sg_table *sgt; 5151 bool src_in_host = false; 5152 bool skip_host_mem_pin = false; 5153 bool user_memset; 5154 5155 ctl = le32_to_cpu(user_dma_pkt->ctl); 5156 5157 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5158 src_in_host = true; 5159 5160 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5161 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5162 5163 if (src_in_host) { 5164 addr = le64_to_cpu(user_dma_pkt->src_addr); 5165 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5166 dir = DMA_TO_DEVICE; 5167 if (user_memset) 5168 skip_host_mem_pin = true; 5169 } else { 5170 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5171 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5172 dir = DMA_FROM_DEVICE; 5173 } 5174 5175 if ((!skip_host_mem_pin) && 5176 (!hl_userptr_is_pinned(hdev, addr, 5177 le32_to_cpu(user_dma_pkt->tsize), 5178 parser->job_userptr_list, &userptr))) { 5179 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5180 addr, user_dma_pkt->tsize); 5181 return -EFAULT; 5182 } 5183 5184 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5185 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5186 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5187 return 0; 5188 } 5189 5190 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5191 5192 sgt = userptr->sgt; 5193 dma_desc_cnt = 0; 5194 5195 for_each_sgtable_dma_sg(sgt, sg, count) { 5196 len = sg_dma_len(sg); 5197 dma_addr = sg_dma_address(sg); 5198 5199 if (len == 0) 5200 break; 5201 5202 while ((count + 1) < sgt->nents) { 5203 sg_next_iter = sg_next(sg); 5204 len_next = sg_dma_len(sg_next_iter); 5205 dma_addr_next = sg_dma_address(sg_next_iter); 5206 5207 if (len_next == 0) 5208 break; 5209 5210 if ((dma_addr + len == dma_addr_next) && 5211 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5212 len += len_next; 5213 count++; 5214 sg = sg_next_iter; 5215 } else { 5216 break; 5217 } 5218 } 5219 5220 ctl = le32_to_cpu(user_dma_pkt->ctl); 5221 if (likely(dma_desc_cnt)) 5222 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5223 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5224 new_dma_pkt->ctl = cpu_to_le32(ctl); 5225 new_dma_pkt->tsize = cpu_to_le32(len); 5226 5227 if (dir == DMA_TO_DEVICE) { 5228 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5229 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5230 } else { 5231 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5232 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5233 } 5234 5235 if (!user_memset) 5236 device_memory_addr += len; 5237 dma_desc_cnt++; 5238 new_dma_pkt++; 5239 } 5240 5241 if (!dma_desc_cnt) { 5242 dev_err(hdev->dev, 5243 "Error of 0 SG entries when patching DMA packet\n"); 5244 return -EFAULT; 5245 } 5246 5247 /* Fix the last dma packet - wrcomp must be as user set it */ 5248 new_dma_pkt--; 5249 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5250 5251 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5252 5253 return 0; 5254 } 5255 5256 static int gaudi_patch_cb(struct hl_device *hdev, 5257 struct hl_cs_parser *parser) 5258 { 5259 u32 cb_parsed_length = 0; 5260 u32 cb_patched_cur_length = 0; 5261 int rc = 0; 5262 5263 /* cb_user_size is more than 0 so loop will always be executed */ 5264 while (cb_parsed_length < parser->user_cb_size) { 5265 enum packet_id pkt_id; 5266 u16 pkt_size; 5267 u32 new_pkt_size = 0; 5268 struct gaudi_packet *user_pkt, *kernel_pkt; 5269 5270 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5271 kernel_pkt = parser->patched_cb->kernel_address + 5272 cb_patched_cur_length; 5273 5274 pkt_id = (enum packet_id) ( 5275 (le64_to_cpu(user_pkt->header) & 5276 PACKET_HEADER_PACKET_ID_MASK) >> 5277 PACKET_HEADER_PACKET_ID_SHIFT); 5278 5279 if (!validate_packet_id(pkt_id)) { 5280 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5281 rc = -EINVAL; 5282 break; 5283 } 5284 5285 pkt_size = gaudi_packet_sizes[pkt_id]; 5286 cb_parsed_length += pkt_size; 5287 if (cb_parsed_length > parser->user_cb_size) { 5288 dev_err(hdev->dev, 5289 "packet 0x%x is out of CB boundary\n", pkt_id); 5290 rc = -EINVAL; 5291 break; 5292 } 5293 5294 switch (pkt_id) { 5295 case PACKET_LIN_DMA: 5296 rc = gaudi_patch_dma_packet(hdev, parser, 5297 (struct packet_lin_dma *) user_pkt, 5298 (struct packet_lin_dma *) kernel_pkt, 5299 &new_pkt_size); 5300 cb_patched_cur_length += new_pkt_size; 5301 break; 5302 5303 case PACKET_MSG_PROT: 5304 dev_err(hdev->dev, 5305 "User not allowed to use MSG_PROT\n"); 5306 rc = -EPERM; 5307 break; 5308 5309 case PACKET_CP_DMA: 5310 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5311 rc = -EPERM; 5312 break; 5313 5314 case PACKET_STOP: 5315 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5316 rc = -EPERM; 5317 break; 5318 5319 case PACKET_WREG_32: 5320 case PACKET_WREG_BULK: 5321 case PACKET_MSG_LONG: 5322 case PACKET_MSG_SHORT: 5323 case PACKET_REPEAT: 5324 case PACKET_FENCE: 5325 case PACKET_NOP: 5326 case PACKET_ARB_POINT: 5327 case PACKET_LOAD_AND_EXE: 5328 memcpy(kernel_pkt, user_pkt, pkt_size); 5329 cb_patched_cur_length += pkt_size; 5330 break; 5331 5332 default: 5333 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5334 pkt_id); 5335 rc = -EINVAL; 5336 break; 5337 } 5338 5339 if (rc) 5340 break; 5341 } 5342 5343 return rc; 5344 } 5345 5346 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5347 struct hl_cs_parser *parser) 5348 { 5349 u64 handle; 5350 u32 patched_cb_size; 5351 struct hl_cb *user_cb; 5352 int rc; 5353 5354 /* 5355 * The new CB should have space at the end for two MSG_PROT packets: 5356 * 1. Optional NOP padding for cacheline alignment 5357 * 2. A packet that will act as a completion packet 5358 * 3. A packet that will generate MSI interrupt 5359 */ 5360 if (parser->completion) 5361 parser->patched_cb_size = parser->user_cb_size + 5362 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5363 else 5364 parser->patched_cb_size = parser->user_cb_size; 5365 5366 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5367 parser->patched_cb_size, false, false, 5368 &handle); 5369 5370 if (rc) { 5371 dev_err(hdev->dev, 5372 "Failed to allocate patched CB for DMA CS %d\n", 5373 rc); 5374 return rc; 5375 } 5376 5377 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5378 /* hl_cb_get should never fail */ 5379 if (!parser->patched_cb) { 5380 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5381 rc = -EFAULT; 5382 goto out; 5383 } 5384 5385 /* 5386 * We are protected from overflow because the check 5387 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5388 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5389 * 5390 * There is no option to reach here without going through that check because: 5391 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5392 * an external queue. 5393 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5394 */ 5395 memcpy(parser->patched_cb->kernel_address, 5396 parser->user_cb->kernel_address, 5397 parser->user_cb_size); 5398 5399 patched_cb_size = parser->patched_cb_size; 5400 5401 /* Validate patched CB instead of user CB */ 5402 user_cb = parser->user_cb; 5403 parser->user_cb = parser->patched_cb; 5404 rc = gaudi_validate_cb(hdev, parser, true); 5405 parser->user_cb = user_cb; 5406 5407 if (rc) { 5408 hl_cb_put(parser->patched_cb); 5409 goto out; 5410 } 5411 5412 if (patched_cb_size != parser->patched_cb_size) { 5413 dev_err(hdev->dev, "user CB size mismatch\n"); 5414 hl_cb_put(parser->patched_cb); 5415 rc = -EINVAL; 5416 goto out; 5417 } 5418 5419 out: 5420 /* 5421 * Always call cb destroy here because we still have 1 reference 5422 * to it by calling cb_get earlier. After the job will be completed, 5423 * cb_put will release it, but here we want to remove it from the 5424 * idr 5425 */ 5426 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5427 5428 return rc; 5429 } 5430 5431 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5432 struct hl_cs_parser *parser) 5433 { 5434 u64 handle; 5435 int rc; 5436 5437 rc = gaudi_validate_cb(hdev, parser, false); 5438 5439 if (rc) 5440 goto free_userptr; 5441 5442 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5443 parser->patched_cb_size, false, false, 5444 &handle); 5445 if (rc) { 5446 dev_err(hdev->dev, 5447 "Failed to allocate patched CB for DMA CS %d\n", rc); 5448 goto free_userptr; 5449 } 5450 5451 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5452 /* hl_cb_get should never fail here */ 5453 if (!parser->patched_cb) { 5454 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5455 rc = -EFAULT; 5456 goto out; 5457 } 5458 5459 rc = gaudi_patch_cb(hdev, parser); 5460 5461 if (rc) 5462 hl_cb_put(parser->patched_cb); 5463 5464 out: 5465 /* 5466 * Always call cb destroy here because we still have 1 reference 5467 * to it by calling cb_get earlier. After the job will be completed, 5468 * cb_put will release it, but here we want to remove it from the 5469 * idr 5470 */ 5471 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5472 5473 free_userptr: 5474 if (rc) 5475 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5476 return rc; 5477 } 5478 5479 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5480 struct hl_cs_parser *parser) 5481 { 5482 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5483 struct gaudi_device *gaudi = hdev->asic_specific; 5484 u32 nic_queue_offset, nic_mask_q_id; 5485 5486 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5487 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5488 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5489 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5490 5491 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5492 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5493 return -EINVAL; 5494 } 5495 } 5496 5497 /* For internal queue jobs just check if CB address is valid */ 5498 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5499 parser->user_cb_size, 5500 asic_prop->sram_user_base_address, 5501 asic_prop->sram_end_address)) 5502 return 0; 5503 5504 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5505 parser->user_cb_size, 5506 asic_prop->dram_user_base_address, 5507 asic_prop->dram_end_address)) 5508 return 0; 5509 5510 /* PMMU and HPMMU addresses are equal, check only one of them */ 5511 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5512 parser->user_cb_size, 5513 asic_prop->pmmu.start_addr, 5514 asic_prop->pmmu.end_addr)) 5515 return 0; 5516 5517 dev_err(hdev->dev, 5518 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5519 parser->user_cb, parser->user_cb_size); 5520 5521 return -EFAULT; 5522 } 5523 5524 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5525 { 5526 struct gaudi_device *gaudi = hdev->asic_specific; 5527 5528 if (parser->queue_type == QUEUE_TYPE_INT) 5529 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5530 5531 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5532 return gaudi_parse_cb_mmu(hdev, parser); 5533 else 5534 return gaudi_parse_cb_no_mmu(hdev, parser); 5535 } 5536 5537 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5538 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5539 u32 msi_vec, bool eb) 5540 { 5541 struct packet_msg_prot *cq_pkt; 5542 struct packet_nop *cq_padding; 5543 u64 msi_addr; 5544 u32 tmp; 5545 5546 cq_padding = kernel_address + original_len; 5547 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5548 5549 while ((void *)cq_padding < (void *)cq_pkt) { 5550 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5551 cq_padding++; 5552 } 5553 5554 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5555 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5556 5557 if (eb) 5558 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5559 5560 cq_pkt->ctl = cpu_to_le32(tmp); 5561 cq_pkt->value = cpu_to_le32(cq_val); 5562 cq_pkt->addr = cpu_to_le64(cq_addr); 5563 5564 cq_pkt++; 5565 5566 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5567 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5568 cq_pkt->ctl = cpu_to_le32(tmp); 5569 cq_pkt->value = cpu_to_le32(1); 5570 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; 5571 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5572 } 5573 5574 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5575 { 5576 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5577 } 5578 5579 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5580 u32 size, u64 val) 5581 { 5582 struct packet_lin_dma *lin_dma_pkt; 5583 struct hl_cs_job *job; 5584 u32 cb_size, ctl, err_cause; 5585 struct hl_cb *cb; 5586 int rc; 5587 5588 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5589 if (!cb) 5590 return -EFAULT; 5591 5592 lin_dma_pkt = cb->kernel_address; 5593 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5594 cb_size = sizeof(*lin_dma_pkt); 5595 5596 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5597 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5598 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5599 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5600 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5601 5602 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5603 lin_dma_pkt->src_addr = cpu_to_le64(val); 5604 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5605 lin_dma_pkt->tsize = cpu_to_le32(size); 5606 5607 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5608 if (!job) { 5609 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5610 rc = -ENOMEM; 5611 goto release_cb; 5612 } 5613 5614 /* Verify DMA is OK */ 5615 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5616 if (err_cause && !hdev->init_done) { 5617 dev_dbg(hdev->dev, 5618 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5619 err_cause); 5620 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5621 } 5622 5623 job->id = 0; 5624 job->user_cb = cb; 5625 atomic_inc(&job->user_cb->cs_cnt); 5626 job->user_cb_size = cb_size; 5627 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5628 job->patched_cb = job->user_cb; 5629 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5630 5631 hl_debugfs_add_job(hdev, job); 5632 5633 rc = gaudi_send_job_on_qman0(hdev, job); 5634 hl_debugfs_remove_job(hdev, job); 5635 kfree(job); 5636 atomic_dec(&cb->cs_cnt); 5637 5638 /* Verify DMA is OK */ 5639 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5640 if (err_cause) { 5641 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5642 rc = -EIO; 5643 if (!hdev->init_done) { 5644 dev_dbg(hdev->dev, 5645 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5646 err_cause); 5647 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5648 } 5649 } 5650 5651 release_cb: 5652 hl_cb_put(cb); 5653 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5654 5655 return rc; 5656 } 5657 5658 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5659 u32 num_regs, u32 val) 5660 { 5661 struct packet_msg_long *pkt; 5662 struct hl_cs_job *job; 5663 u32 cb_size, ctl; 5664 struct hl_cb *cb; 5665 int i, rc; 5666 5667 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5668 5669 if (cb_size > SZ_2M) { 5670 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5671 return -ENOMEM; 5672 } 5673 5674 cb = hl_cb_kernel_create(hdev, cb_size, false); 5675 if (!cb) 5676 return -EFAULT; 5677 5678 pkt = cb->kernel_address; 5679 5680 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5681 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5682 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5683 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5684 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5685 5686 for (i = 0; i < num_regs ; i++, pkt++) { 5687 pkt->ctl = cpu_to_le32(ctl); 5688 pkt->value = cpu_to_le32(val); 5689 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5690 } 5691 5692 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5693 if (!job) { 5694 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5695 rc = -ENOMEM; 5696 goto release_cb; 5697 } 5698 5699 job->id = 0; 5700 job->user_cb = cb; 5701 atomic_inc(&job->user_cb->cs_cnt); 5702 job->user_cb_size = cb_size; 5703 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5704 job->patched_cb = job->user_cb; 5705 job->job_cb_size = cb_size; 5706 5707 hl_debugfs_add_job(hdev, job); 5708 5709 rc = gaudi_send_job_on_qman0(hdev, job); 5710 hl_debugfs_remove_job(hdev, job); 5711 kfree(job); 5712 atomic_dec(&cb->cs_cnt); 5713 5714 release_cb: 5715 hl_cb_put(cb); 5716 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5717 5718 return rc; 5719 } 5720 5721 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5722 { 5723 u64 base_addr; 5724 u32 num_regs; 5725 int rc; 5726 5727 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5728 num_regs = NUM_OF_SOB_IN_BLOCK; 5729 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5730 if (rc) { 5731 dev_err(hdev->dev, "failed resetting SM registers"); 5732 return -ENOMEM; 5733 } 5734 5735 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5736 num_regs = NUM_OF_SOB_IN_BLOCK; 5737 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5738 if (rc) { 5739 dev_err(hdev->dev, "failed resetting SM registers"); 5740 return -ENOMEM; 5741 } 5742 5743 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5744 num_regs = NUM_OF_SOB_IN_BLOCK; 5745 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5746 if (rc) { 5747 dev_err(hdev->dev, "failed resetting SM registers"); 5748 return -ENOMEM; 5749 } 5750 5751 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5752 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5753 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5754 if (rc) { 5755 dev_err(hdev->dev, "failed resetting SM registers"); 5756 return -ENOMEM; 5757 } 5758 5759 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5760 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5761 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5762 if (rc) { 5763 dev_err(hdev->dev, "failed resetting SM registers"); 5764 return -ENOMEM; 5765 } 5766 5767 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5768 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5769 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5770 if (rc) { 5771 dev_err(hdev->dev, "failed resetting SM registers"); 5772 return -ENOMEM; 5773 } 5774 5775 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5776 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5777 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5778 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5779 if (rc) { 5780 dev_err(hdev->dev, "failed resetting SM registers"); 5781 return -ENOMEM; 5782 } 5783 5784 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5785 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5786 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5787 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5788 if (rc) { 5789 dev_err(hdev->dev, "failed resetting SM registers"); 5790 return -ENOMEM; 5791 } 5792 5793 return 0; 5794 } 5795 5796 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5797 { 5798 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5799 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5800 int i; 5801 5802 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5803 u64 sob_addr = CFG_BASE + 5804 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5805 (i * sob_delta); 5806 u32 dma_offset = i * DMA_CORE_OFFSET; 5807 5808 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5809 lower_32_bits(sob_addr)); 5810 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5811 upper_32_bits(sob_addr)); 5812 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5813 5814 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5815 * modified by the user for SRAM reduction 5816 */ 5817 if (i > 1) 5818 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5819 0x00000001); 5820 } 5821 } 5822 5823 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5824 { 5825 u32 qman_offset; 5826 int i; 5827 5828 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5829 qman_offset = i * DMA_QMAN_OFFSET; 5830 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5831 } 5832 5833 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5834 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5835 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5836 } 5837 5838 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5839 qman_offset = i * TPC_QMAN_OFFSET; 5840 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5841 } 5842 5843 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5844 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5845 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5846 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5847 } 5848 } 5849 5850 static int gaudi_restore_user_registers(struct hl_device *hdev) 5851 { 5852 int rc; 5853 5854 rc = gaudi_restore_sm_registers(hdev); 5855 if (rc) 5856 return rc; 5857 5858 gaudi_restore_dma_registers(hdev); 5859 gaudi_restore_qm_registers(hdev); 5860 5861 return 0; 5862 } 5863 5864 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5865 { 5866 return 0; 5867 } 5868 5869 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5870 { 5871 u32 size = hdev->asic_prop.mmu_pgt_size + 5872 hdev->asic_prop.mmu_cache_mng_size; 5873 struct gaudi_device *gaudi = hdev->asic_specific; 5874 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5875 5876 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5877 return 0; 5878 5879 return gaudi_memset_device_memory(hdev, addr, size, 0); 5880 } 5881 5882 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5883 { 5884 5885 } 5886 5887 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5888 u32 size_to_dma, dma_addr_t dma_addr) 5889 { 5890 u32 err_cause, val; 5891 u64 dma_offset; 5892 int rc; 5893 5894 dma_offset = dma_id * DMA_CORE_OFFSET; 5895 5896 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5897 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5898 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5899 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5900 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5901 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5902 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5903 5904 rc = hl_poll_timeout( 5905 hdev, 5906 mmDMA0_CORE_STS0 + dma_offset, 5907 val, 5908 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5909 0, 5910 1000000); 5911 5912 if (rc) { 5913 dev_err(hdev->dev, 5914 "DMA %d timed-out during reading of 0x%llx\n", 5915 dma_id, addr); 5916 return -EIO; 5917 } 5918 5919 /* Verify DMA is OK */ 5920 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5921 if (err_cause) { 5922 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5923 dev_dbg(hdev->dev, 5924 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5925 err_cause); 5926 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5927 5928 return -EIO; 5929 } 5930 5931 return 0; 5932 } 5933 5934 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5935 void *blob_addr) 5936 { 5937 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5938 u32 qm_glbl_sts0, qm_cgm_sts; 5939 u64 dma_offset, qm_offset; 5940 dma_addr_t dma_addr; 5941 void *kernel_addr; 5942 bool is_eng_idle; 5943 int rc = 0, dma_id; 5944 5945 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 5946 5947 if (!kernel_addr) 5948 return -ENOMEM; 5949 5950 hdev->asic_funcs->hw_queues_lock(hdev); 5951 5952 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 5953 dma_offset = dma_id * DMA_CORE_OFFSET; 5954 qm_offset = dma_id * DMA_QMAN_OFFSET; 5955 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5956 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5957 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5958 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5959 IS_DMA_IDLE(dma_core_sts0); 5960 5961 if (!is_eng_idle) { 5962 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 5963 dma_offset = dma_id * DMA_CORE_OFFSET; 5964 qm_offset = dma_id * DMA_QMAN_OFFSET; 5965 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5966 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5967 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5968 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5969 IS_DMA_IDLE(dma_core_sts0); 5970 5971 if (!is_eng_idle) { 5972 dev_err_ratelimited(hdev->dev, 5973 "Can't read via DMA because it is BUSY\n"); 5974 rc = -EAGAIN; 5975 goto out; 5976 } 5977 } 5978 5979 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 5980 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 5981 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 5982 5983 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 5984 * using the compute ctx ASID, if exists. If not, use the kernel ctx 5985 * ASID 5986 */ 5987 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 5988 5989 /* Verify DMA is OK */ 5990 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5991 if (err_cause) { 5992 dev_dbg(hdev->dev, 5993 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5994 err_cause); 5995 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5996 } 5997 5998 pos = 0; 5999 size_left = size; 6000 size_to_dma = SZ_2M; 6001 6002 while (size_left > 0) { 6003 6004 if (size_left < SZ_2M) 6005 size_to_dma = size_left; 6006 6007 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6008 dma_addr); 6009 if (rc) 6010 break; 6011 6012 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6013 6014 if (size_left <= SZ_2M) 6015 break; 6016 6017 pos += SZ_2M; 6018 addr += SZ_2M; 6019 size_left -= SZ_2M; 6020 } 6021 6022 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6023 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6024 * ASID 6025 */ 6026 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6027 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6028 6029 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6030 6031 out: 6032 hdev->asic_funcs->hw_queues_unlock(hdev); 6033 6034 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6035 6036 return rc; 6037 } 6038 6039 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6040 { 6041 struct gaudi_device *gaudi = hdev->asic_specific; 6042 6043 if (hdev->reset_info.hard_reset_pending) 6044 return U64_MAX; 6045 6046 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6047 (addr - gaudi->hbm_bar_cur_addr)); 6048 } 6049 6050 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6051 { 6052 struct gaudi_device *gaudi = hdev->asic_specific; 6053 6054 if (hdev->reset_info.hard_reset_pending) 6055 return; 6056 6057 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6058 (addr - gaudi->hbm_bar_cur_addr)); 6059 } 6060 6061 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6062 { 6063 /* mask to zero the MMBP and ASID bits */ 6064 WREG32_AND(reg, ~0x7FF); 6065 WREG32_OR(reg, asid); 6066 } 6067 6068 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6069 { 6070 struct gaudi_device *gaudi = hdev->asic_specific; 6071 6072 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6073 return; 6074 6075 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6076 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6077 return; 6078 } 6079 6080 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6081 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6082 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6083 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6084 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6085 6086 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6087 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6088 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6089 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6090 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6091 6092 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6093 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6094 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6095 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6096 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6097 6098 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6099 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6100 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6101 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6102 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6103 6104 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6105 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6106 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6107 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6108 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6109 6110 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6111 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6112 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6113 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6114 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6115 6116 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6117 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6118 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6119 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6120 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6121 6122 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6123 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6124 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6125 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6126 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6127 6128 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6129 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6130 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6131 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6132 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6133 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6134 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6135 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6136 6137 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6138 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6139 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6140 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6141 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6142 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6143 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6144 6145 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6146 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6147 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6149 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6152 6153 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6155 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6160 6161 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6167 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6168 6169 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6173 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6176 6177 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6179 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6184 6185 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6191 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6192 6193 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6200 6201 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6207 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6211 6212 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6213 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6214 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6215 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6216 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6217 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6218 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6219 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6220 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6221 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6222 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6223 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6224 6225 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6226 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6227 asid); 6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6229 asid); 6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6231 asid); 6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6233 asid); 6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6235 asid); 6236 } 6237 6238 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6239 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6240 asid); 6241 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6242 asid); 6243 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6244 asid); 6245 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6246 asid); 6247 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6248 asid); 6249 } 6250 6251 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6252 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6253 asid); 6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6255 asid); 6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6257 asid); 6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6259 asid); 6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6261 asid); 6262 } 6263 6264 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6265 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6266 asid); 6267 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6268 asid); 6269 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6270 asid); 6271 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6272 asid); 6273 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6274 asid); 6275 } 6276 6277 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6278 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6279 asid); 6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6281 asid); 6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6283 asid); 6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6285 asid); 6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6287 asid); 6288 } 6289 6290 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6291 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6292 asid); 6293 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6294 asid); 6295 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6296 asid); 6297 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6298 asid); 6299 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6300 asid); 6301 } 6302 6303 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6304 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6305 asid); 6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6307 asid); 6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6309 asid); 6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6311 asid); 6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6313 asid); 6314 } 6315 6316 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6317 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6318 asid); 6319 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6320 asid); 6321 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6322 asid); 6323 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6324 asid); 6325 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6326 asid); 6327 } 6328 6329 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6330 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6331 asid); 6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6333 asid); 6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6335 asid); 6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6337 asid); 6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6339 asid); 6340 } 6341 6342 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6343 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6344 asid); 6345 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6346 asid); 6347 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6348 asid); 6349 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6350 asid); 6351 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6352 asid); 6353 } 6354 6355 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6356 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6357 } 6358 6359 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6360 struct hl_cs_job *job) 6361 { 6362 struct packet_msg_prot *fence_pkt; 6363 u32 *fence_ptr; 6364 dma_addr_t fence_dma_addr; 6365 struct hl_cb *cb; 6366 u32 tmp, timeout, dma_offset; 6367 int rc; 6368 6369 if (hdev->pldm) 6370 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6371 else 6372 timeout = HL_DEVICE_TIMEOUT_USEC; 6373 6374 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6375 if (!fence_ptr) { 6376 dev_err(hdev->dev, 6377 "Failed to allocate fence memory for QMAN0\n"); 6378 return -ENOMEM; 6379 } 6380 6381 cb = job->patched_cb; 6382 6383 fence_pkt = cb->kernel_address + 6384 job->job_cb_size - sizeof(struct packet_msg_prot); 6385 6386 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6387 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6388 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6389 6390 fence_pkt->ctl = cpu_to_le32(tmp); 6391 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6392 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6393 6394 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6395 6396 WREG32(mmDMA0_CORE_PROT + dma_offset, 6397 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6398 6399 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6400 job->job_cb_size, cb->bus_address); 6401 if (rc) { 6402 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6403 goto free_fence_ptr; 6404 } 6405 6406 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6407 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6408 timeout, true); 6409 6410 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6411 6412 if (rc == -ETIMEDOUT) { 6413 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6414 goto free_fence_ptr; 6415 } 6416 6417 free_fence_ptr: 6418 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6419 6420 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6421 return rc; 6422 } 6423 6424 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6425 { 6426 if (event_type >= GAUDI_EVENT_SIZE) 6427 goto event_not_supported; 6428 6429 if (!gaudi_irq_map_table[event_type].valid) 6430 goto event_not_supported; 6431 6432 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6433 6434 return; 6435 6436 event_not_supported: 6437 snprintf(desc, size, "N/A"); 6438 } 6439 6440 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6441 bool is_write, u16 *engine_id_1, 6442 u16 *engine_id_2) 6443 { 6444 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6445 6446 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6447 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6448 6449 switch (x_y) { 6450 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6451 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6452 dma_id[0] = 0; 6453 dma_id[1] = 2; 6454 break; 6455 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6456 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6457 dma_id[0] = 1; 6458 dma_id[1] = 3; 6459 break; 6460 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6461 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6462 dma_id[0] = 4; 6463 dma_id[1] = 6; 6464 break; 6465 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6466 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6467 dma_id[0] = 5; 6468 dma_id[1] = 7; 6469 break; 6470 default: 6471 goto unknown_initiator; 6472 } 6473 6474 for (i = 0 ; i < 2 ; i++) { 6475 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6476 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6477 } 6478 6479 switch (x_y) { 6480 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6481 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6482 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6483 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6484 return "DMA0"; 6485 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6486 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6487 return "DMA2"; 6488 } else { 6489 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6490 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6491 return "DMA0 or DMA2"; 6492 } 6493 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6494 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6495 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6496 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6497 return "DMA1"; 6498 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6499 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6500 return "DMA3"; 6501 } else { 6502 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6503 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6504 return "DMA1 or DMA3"; 6505 } 6506 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6507 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6508 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6509 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6510 return "DMA4"; 6511 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6512 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6513 return "DMA6"; 6514 } else { 6515 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6516 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6517 return "DMA4 or DMA6"; 6518 } 6519 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6520 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6521 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6522 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6523 return "DMA5"; 6524 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6525 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6526 return "DMA7"; 6527 } else { 6528 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6529 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6530 return "DMA5 or DMA7"; 6531 } 6532 } 6533 6534 unknown_initiator: 6535 return "unknown initiator"; 6536 } 6537 6538 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6539 u16 *engine_id_1, u16 *engine_id_2) 6540 { 6541 u32 val, x_y, axi_id; 6542 6543 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6544 RREG32(mmMMU_UP_RAZWI_READ_ID); 6545 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6546 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6547 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6548 RAZWI_INITIATOR_AXI_ID_SHIFT); 6549 6550 switch (x_y) { 6551 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6552 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6553 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6554 return "TPC0"; 6555 } 6556 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6557 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6558 return "NIC0"; 6559 } 6560 break; 6561 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6562 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6563 return "TPC1"; 6564 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6565 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6566 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6567 return "MME0"; 6568 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6569 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6570 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6571 return "MME1"; 6572 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6573 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6574 return "TPC2"; 6575 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6576 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6577 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6578 return "TPC3"; 6579 } 6580 /* PCI, CPU or PSOC does not have engine id*/ 6581 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6582 return "PCI"; 6583 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6584 return "CPU"; 6585 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6586 return "PSOC"; 6587 break; 6588 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6589 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6590 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6591 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6592 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6593 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6594 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6595 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6596 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6597 engine_id_1, engine_id_2); 6598 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6599 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6600 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6601 return "TPC4"; 6602 } 6603 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6604 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6605 return "NIC1"; 6606 } 6607 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6608 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6609 return "NIC2"; 6610 } 6611 break; 6612 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6613 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6614 return "TPC5"; 6615 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6616 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6617 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6618 return "MME2"; 6619 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6620 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6621 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6622 return "MME3"; 6623 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6624 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6625 return "TPC6"; 6626 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6627 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6628 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6629 return "TPC7"; 6630 } 6631 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6632 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6633 return "NIC4"; 6634 } 6635 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6636 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6637 return "NIC5"; 6638 } 6639 break; 6640 default: 6641 break; 6642 } 6643 6644 dev_err(hdev->dev, 6645 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6646 val, 6647 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6648 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6649 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6650 RAZWI_INITIATOR_AXI_ID_MASK); 6651 6652 return "unknown initiator"; 6653 } 6654 6655 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6656 u16 *engine_id_2, bool *is_read, bool *is_write) 6657 { 6658 6659 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6660 dev_err_ratelimited(hdev->dev, 6661 "RAZWI event caused by illegal write of %s\n", 6662 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6663 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6664 *is_write = true; 6665 } 6666 6667 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6668 dev_err_ratelimited(hdev->dev, 6669 "RAZWI event caused by illegal read of %s\n", 6670 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6671 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6672 *is_read = true; 6673 } 6674 } 6675 6676 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6677 { 6678 struct gaudi_device *gaudi = hdev->asic_specific; 6679 u32 val; 6680 6681 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6682 return; 6683 6684 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6685 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6686 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6687 *addr <<= 32; 6688 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6689 6690 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6691 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6692 6693 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6694 } 6695 6696 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6697 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6698 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6699 *addr <<= 32; 6700 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6701 6702 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6703 6704 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6705 } 6706 } 6707 6708 /* 6709 * +-------------------+------------------------------------------------------+ 6710 * | Configuration Reg | Description | 6711 * | Address | | 6712 * +-------------------+------------------------------------------------------+ 6713 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6714 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6715 * | |0xF34 memory wrappers 63:32 | 6716 * | |0xF38 memory wrappers 95:64 | 6717 * | |0xF3C memory wrappers 127:96 | 6718 * +-------------------+------------------------------------------------------+ 6719 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6720 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6721 * | |0xF44 memory wrappers 63:32 | 6722 * | |0xF48 memory wrappers 95:64 | 6723 * | |0xF4C memory wrappers 127:96 | 6724 * +-------------------+------------------------------------------------------+ 6725 */ 6726 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6727 struct ecc_info_extract_params *params, u64 *ecc_address, 6728 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6729 { 6730 u32 i, num_mem_regs, reg, err_bit; 6731 u64 err_addr, err_word = 0; 6732 6733 num_mem_regs = params->num_memories / 32 + 6734 ((params->num_memories % 32) ? 1 : 0); 6735 6736 if (params->block_address >= CFG_BASE) 6737 params->block_address -= CFG_BASE; 6738 6739 if (params->derr) 6740 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6741 else 6742 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6743 6744 /* Set invalid wrapper index */ 6745 *memory_wrapper_idx = 0xFF; 6746 6747 /* Iterate through memory wrappers, a single bit must be set */ 6748 for (i = 0 ; i < num_mem_regs ; i++) { 6749 err_addr += i * 4; 6750 err_word = RREG32(err_addr); 6751 if (err_word) { 6752 err_bit = __ffs(err_word); 6753 *memory_wrapper_idx = err_bit + (32 * i); 6754 break; 6755 } 6756 } 6757 6758 if (*memory_wrapper_idx == 0xFF) { 6759 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6760 return -EINVAL; 6761 } 6762 6763 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6764 *memory_wrapper_idx); 6765 6766 *ecc_address = 6767 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6768 *ecc_syndrom = 6769 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6770 6771 /* Clear error indication */ 6772 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6773 if (params->derr) 6774 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6775 else 6776 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6777 6778 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6779 6780 return 0; 6781 } 6782 6783 /* 6784 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6785 * 6786 * @idx: the current pi/ci value 6787 * @q_len: the queue length (power of 2) 6788 * 6789 * @return the cyclically decremented index 6790 */ 6791 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6792 { 6793 u32 mask = q_len - 1; 6794 6795 /* 6796 * modular decrement is equivalent to adding (queue_size -1) 6797 * later we take LSBs to make sure the value is in the 6798 * range [0, queue_len - 1] 6799 */ 6800 return (idx + q_len - 1) & mask; 6801 } 6802 6803 /** 6804 * gaudi_handle_sw_config_stream_data - print SW config stream data 6805 * 6806 * @hdev: pointer to the habanalabs device structure 6807 * @stream: the QMAN's stream 6808 * @qman_base: base address of QMAN registers block 6809 * @event_mask: mask of the last events occurred 6810 */ 6811 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6812 u64 qman_base, u64 event_mask) 6813 { 6814 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6815 u32 cq_ptr_lo_off, size; 6816 6817 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6818 6819 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6820 stream * cq_ptr_lo_off; 6821 cq_ptr_hi = cq_ptr_lo + 6822 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6823 cq_tsize = cq_ptr_lo + 6824 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6825 6826 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6827 size = RREG32(cq_tsize); 6828 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6829 stream, cq_ptr, size); 6830 6831 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6832 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6833 hdev->captured_err_info.undef_opcode.cq_size = size; 6834 hdev->captured_err_info.undef_opcode.stream_id = stream; 6835 } 6836 } 6837 6838 /** 6839 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6840 * 6841 * @hdev: pointer to the habanalabs device structure 6842 * @qid_base: first QID of the QMAN (out of 4 streams) 6843 * @stream: the QMAN's stream 6844 * @qman_base: base address of QMAN registers block 6845 * @event_mask: mask of the last events occurred 6846 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6847 */ 6848 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6849 u32 stream, u64 qman_base, 6850 u64 event_mask, 6851 bool pr_sw_conf) 6852 { 6853 u32 ci, qm_ci_stream_off, queue_len; 6854 struct hl_hw_queue *q; 6855 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6856 int i; 6857 6858 q = &hdev->kernel_queues[qid_base + stream]; 6859 6860 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6861 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6862 stream * qm_ci_stream_off; 6863 6864 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6865 q->int_queue_len : HL_QUEUE_LENGTH; 6866 6867 hdev->asic_funcs->hw_queues_lock(hdev); 6868 6869 if (pr_sw_conf) 6870 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6871 6872 ci = RREG32(pq_ci); 6873 6874 /* we should start printing form ci -1 */ 6875 ci = gaudi_queue_idx_dec(ci, queue_len); 6876 memset(addr, 0, sizeof(addr)); 6877 6878 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6879 struct hl_bd *bd; 6880 u32 len; 6881 6882 bd = q->kernel_address; 6883 bd += ci; 6884 6885 len = le32_to_cpu(bd->len); 6886 /* len 0 means uninitialized entry- break */ 6887 if (!len) 6888 break; 6889 6890 addr[i] = le64_to_cpu(bd->ptr); 6891 6892 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6893 stream, ci, addr[i], len); 6894 6895 /* get previous ci, wrap if needed */ 6896 ci = gaudi_queue_idx_dec(ci, queue_len); 6897 } 6898 6899 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6900 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6901 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6902 6903 if (arr_idx == 0) { 6904 undef_opcode->timestamp = ktime_get(); 6905 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6906 } 6907 6908 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6909 undef_opcode->cb_addr_streams_len++; 6910 } 6911 6912 hdev->asic_funcs->hw_queues_unlock(hdev); 6913 } 6914 6915 /** 6916 * handle_qman_data_on_err - extract QMAN data on error 6917 * 6918 * @hdev: pointer to the habanalabs device structure 6919 * @qid_base: first QID of the QMAN (out of 4 streams) 6920 * @stream: the QMAN's stream 6921 * @qman_base: base address of QMAN registers block 6922 * @event_mask: mask of the last events occurred 6923 * 6924 * This function attempt to exatract as much data as possible on QMAN error. 6925 * On upper CP print the SW config stream data and last 8 PQEs. 6926 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6927 */ 6928 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6929 u32 stream, u64 qman_base, u64 event_mask) 6930 { 6931 u32 i; 6932 6933 if (stream != QMAN_STREAMS) { 6934 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6935 qman_base, event_mask, true); 6936 return; 6937 } 6938 6939 /* handle Lower-CP */ 6940 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6941 6942 for (i = 0; i < QMAN_STREAMS; i++) 6943 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 6944 qman_base, event_mask, false); 6945 } 6946 6947 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 6948 const char *qm_name, 6949 u64 qman_base, 6950 u32 qid_base, 6951 u64 *event_mask) 6952 { 6953 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 6954 u64 glbl_sts_addr, arb_err_addr; 6955 char reg_desc[32]; 6956 6957 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 6958 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 6959 6960 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 6961 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 6962 glbl_sts_clr_val = 0; 6963 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 6964 6965 if (!glbl_sts_val) 6966 continue; 6967 6968 if (i == QMAN_STREAMS) 6969 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 6970 else 6971 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 6972 6973 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 6974 if (glbl_sts_val & BIT(j)) { 6975 dev_err_ratelimited(hdev->dev, 6976 "%s %s. err cause: %s\n", 6977 qm_name, reg_desc, 6978 gaudi_qman_error_cause[j]); 6979 glbl_sts_clr_val |= BIT(j); 6980 } 6981 } 6982 /* check for undefined opcode */ 6983 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 6984 hdev->captured_err_info.undef_opcode.write_enable) { 6985 memset(&hdev->captured_err_info.undef_opcode, 0, 6986 sizeof(hdev->captured_err_info.undef_opcode)); 6987 6988 hdev->captured_err_info.undef_opcode.write_enable = false; 6989 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 6990 } 6991 6992 /* Write 1 clear errors */ 6993 if (!hdev->stop_on_err) 6994 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 6995 else 6996 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 6997 } 6998 6999 arb_err_val = RREG32(arb_err_addr); 7000 7001 if (!arb_err_val) 7002 return; 7003 7004 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7005 if (arb_err_val & BIT(j)) { 7006 dev_err_ratelimited(hdev->dev, 7007 "%s ARB_ERR. err cause: %s\n", 7008 qm_name, 7009 gaudi_qman_arb_error_cause[j]); 7010 } 7011 } 7012 } 7013 7014 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7015 struct hl_eq_sm_sei_data *sei_data) 7016 { 7017 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7018 7019 /* Flip the bits as the enum is ordered in the opposite way */ 7020 index = (index ^ 0x3) & 0x3; 7021 7022 switch (sei_data->sei_cause) { 7023 case SM_SEI_SO_OVERFLOW: 7024 dev_err_ratelimited(hdev->dev, 7025 "%s SEI Error: SOB Group %u overflow/underflow", 7026 gaudi_sync_manager_names[index], 7027 le32_to_cpu(sei_data->sei_log)); 7028 break; 7029 case SM_SEI_LBW_4B_UNALIGNED: 7030 dev_err_ratelimited(hdev->dev, 7031 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7032 gaudi_sync_manager_names[index], 7033 le32_to_cpu(sei_data->sei_log)); 7034 break; 7035 case SM_SEI_AXI_RESPONSE_ERR: 7036 dev_err_ratelimited(hdev->dev, 7037 "%s SEI Error: AXI ID %u response error", 7038 gaudi_sync_manager_names[index], 7039 le32_to_cpu(sei_data->sei_log)); 7040 break; 7041 default: 7042 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7043 le32_to_cpu(sei_data->sei_log)); 7044 break; 7045 } 7046 } 7047 7048 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7049 struct hl_eq_ecc_data *ecc_data) 7050 { 7051 struct ecc_info_extract_params params; 7052 u64 ecc_address = 0, ecc_syndrom = 0; 7053 u8 index, memory_wrapper_idx = 0; 7054 bool extract_info_from_fw; 7055 int rc; 7056 7057 if (hdev->asic_prop.fw_security_enabled) { 7058 extract_info_from_fw = true; 7059 goto extract_ecc_info; 7060 } 7061 7062 switch (event_type) { 7063 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7064 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7065 extract_info_from_fw = true; 7066 break; 7067 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7068 index = event_type - GAUDI_EVENT_TPC0_SERR; 7069 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7070 params.num_memories = 90; 7071 params.derr = false; 7072 extract_info_from_fw = false; 7073 break; 7074 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7075 index = event_type - GAUDI_EVENT_TPC0_DERR; 7076 params.block_address = 7077 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7078 params.num_memories = 90; 7079 params.derr = true; 7080 extract_info_from_fw = false; 7081 break; 7082 case GAUDI_EVENT_MME0_ACC_SERR: 7083 case GAUDI_EVENT_MME1_ACC_SERR: 7084 case GAUDI_EVENT_MME2_ACC_SERR: 7085 case GAUDI_EVENT_MME3_ACC_SERR: 7086 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7087 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7088 params.num_memories = 128; 7089 params.derr = false; 7090 extract_info_from_fw = false; 7091 break; 7092 case GAUDI_EVENT_MME0_ACC_DERR: 7093 case GAUDI_EVENT_MME1_ACC_DERR: 7094 case GAUDI_EVENT_MME2_ACC_DERR: 7095 case GAUDI_EVENT_MME3_ACC_DERR: 7096 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7097 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7098 params.num_memories = 128; 7099 params.derr = true; 7100 extract_info_from_fw = false; 7101 break; 7102 case GAUDI_EVENT_MME0_SBAB_SERR: 7103 case GAUDI_EVENT_MME1_SBAB_SERR: 7104 case GAUDI_EVENT_MME2_SBAB_SERR: 7105 case GAUDI_EVENT_MME3_SBAB_SERR: 7106 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7107 params.block_address = 7108 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7109 params.num_memories = 33; 7110 params.derr = false; 7111 extract_info_from_fw = false; 7112 break; 7113 case GAUDI_EVENT_MME0_SBAB_DERR: 7114 case GAUDI_EVENT_MME1_SBAB_DERR: 7115 case GAUDI_EVENT_MME2_SBAB_DERR: 7116 case GAUDI_EVENT_MME3_SBAB_DERR: 7117 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7118 params.block_address = 7119 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7120 params.num_memories = 33; 7121 params.derr = true; 7122 extract_info_from_fw = false; 7123 break; 7124 default: 7125 return; 7126 } 7127 7128 extract_ecc_info: 7129 if (extract_info_from_fw) { 7130 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7131 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7132 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7133 } else { 7134 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7135 &ecc_syndrom, &memory_wrapper_idx); 7136 if (rc) 7137 return; 7138 } 7139 7140 dev_err(hdev->dev, 7141 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7142 ecc_address, ecc_syndrom, memory_wrapper_idx); 7143 } 7144 7145 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7146 { 7147 u64 qman_base; 7148 char desc[32]; 7149 u32 qid_base; 7150 u8 index; 7151 7152 switch (event_type) { 7153 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7154 index = event_type - GAUDI_EVENT_TPC0_QM; 7155 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7156 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7157 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7158 break; 7159 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7160 if (event_type == GAUDI_EVENT_MME0_QM) { 7161 index = 0; 7162 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7163 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7164 index = 2; 7165 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7166 } 7167 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7168 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7169 break; 7170 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7171 index = event_type - GAUDI_EVENT_DMA0_QM; 7172 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7173 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7174 if (index > 1) 7175 qid_base++; 7176 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7177 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7178 break; 7179 case GAUDI_EVENT_NIC0_QM0: 7180 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7181 qman_base = mmNIC0_QM0_BASE; 7182 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7183 break; 7184 case GAUDI_EVENT_NIC0_QM1: 7185 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7186 qman_base = mmNIC0_QM1_BASE; 7187 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7188 break; 7189 case GAUDI_EVENT_NIC1_QM0: 7190 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7191 qman_base = mmNIC1_QM0_BASE; 7192 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7193 break; 7194 case GAUDI_EVENT_NIC1_QM1: 7195 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7196 qman_base = mmNIC1_QM1_BASE; 7197 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7198 break; 7199 case GAUDI_EVENT_NIC2_QM0: 7200 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7201 qman_base = mmNIC2_QM0_BASE; 7202 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7203 break; 7204 case GAUDI_EVENT_NIC2_QM1: 7205 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7206 qman_base = mmNIC2_QM1_BASE; 7207 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7208 break; 7209 case GAUDI_EVENT_NIC3_QM0: 7210 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7211 qman_base = mmNIC3_QM0_BASE; 7212 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7213 break; 7214 case GAUDI_EVENT_NIC3_QM1: 7215 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7216 qman_base = mmNIC3_QM1_BASE; 7217 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7218 break; 7219 case GAUDI_EVENT_NIC4_QM0: 7220 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7221 qman_base = mmNIC4_QM0_BASE; 7222 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7223 break; 7224 case GAUDI_EVENT_NIC4_QM1: 7225 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7226 qman_base = mmNIC4_QM1_BASE; 7227 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7228 break; 7229 default: 7230 return; 7231 } 7232 7233 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7234 } 7235 7236 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7237 bool check_razwi, u64 *event_mask) 7238 { 7239 bool is_read = false, is_write = false; 7240 u16 engine_id[2], num_of_razwi_eng = 0; 7241 char desc[64] = ""; 7242 u64 razwi_addr = 0; 7243 u8 razwi_flags = 0; 7244 7245 /* 7246 * Init engine id by default as not valid and only if razwi initiated from engine with 7247 * engine id it will get valid value. 7248 */ 7249 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7250 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7251 7252 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7253 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7254 event_type, desc); 7255 7256 if (check_razwi) { 7257 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7258 &is_write); 7259 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7260 7261 if (is_read) 7262 razwi_flags |= HL_RAZWI_READ; 7263 if (is_write) 7264 razwi_flags |= HL_RAZWI_WRITE; 7265 7266 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7267 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7268 num_of_razwi_eng = 2; 7269 else 7270 num_of_razwi_eng = 1; 7271 } 7272 7273 if (razwi_flags) 7274 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7275 razwi_flags, event_mask); 7276 } 7277 } 7278 7279 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7280 struct cpucp_pkt_sync_err *sync_err) 7281 { 7282 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7283 7284 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7285 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7286 } 7287 7288 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7289 struct hl_eq_fw_alive *fw_alive) 7290 { 7291 dev_err(hdev->dev, 7292 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7293 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7294 le32_to_cpu(fw_alive->process_id), 7295 le32_to_cpu(fw_alive->thread_id), 7296 le64_to_cpu(fw_alive->uptime_seconds)); 7297 } 7298 7299 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7300 void *data) 7301 { 7302 char desc[64] = "", *type; 7303 struct eq_nic_sei_event *eq_nic_sei = data; 7304 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7305 7306 switch (eq_nic_sei->axi_error_cause) { 7307 case RXB: 7308 type = "RXB"; 7309 break; 7310 case RXE: 7311 type = "RXE"; 7312 break; 7313 case TXS: 7314 type = "TXS"; 7315 break; 7316 case TXE: 7317 type = "TXE"; 7318 break; 7319 case QPC_RESP: 7320 type = "QPC_RESP"; 7321 break; 7322 case NON_AXI_ERR: 7323 type = "NON_AXI_ERR"; 7324 break; 7325 case TMR: 7326 type = "TMR"; 7327 break; 7328 default: 7329 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7330 eq_nic_sei->axi_error_cause); 7331 type = "N/A"; 7332 break; 7333 } 7334 7335 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7336 eq_nic_sei->id); 7337 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7338 event_type, desc); 7339 } 7340 7341 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7342 { 7343 /* GAUDI doesn't support any reset except hard-reset */ 7344 return -EPERM; 7345 } 7346 7347 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7348 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7349 { 7350 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7351 int rc = 0; 7352 7353 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7354 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7355 if (!hbm_ecc_data) { 7356 dev_err(hdev->dev, "No FW ECC data"); 7357 return 0; 7358 } 7359 7360 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7361 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7362 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7363 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7364 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7365 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7366 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7367 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7368 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7369 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7370 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7371 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7372 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7373 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7374 7375 dev_err(hdev->dev, 7376 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7377 device, ch, wr_par, rd_par, ca_par, serr, derr); 7378 dev_err(hdev->dev, 7379 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7380 device, ch, hbm_ecc_data->first_addr, type, 7381 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7382 hbm_ecc_data->dec_cnt); 7383 return 0; 7384 } 7385 7386 if (hdev->asic_prop.fw_security_enabled) { 7387 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7388 return 0; 7389 } 7390 7391 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7392 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7393 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7394 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7395 if (val) { 7396 rc = -EIO; 7397 dev_err(hdev->dev, 7398 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7399 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7400 (val >> 2) & 0x1, (val >> 3) & 0x1, 7401 (val >> 4) & 0x1); 7402 7403 val2 = RREG32(base + ch * 0x1000 + 0x060); 7404 dev_err(hdev->dev, 7405 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7406 device, ch * 2, 7407 RREG32(base + ch * 0x1000 + 0x064), 7408 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7409 (val2 & 0xFF0000) >> 16, 7410 (val2 & 0xFF000000) >> 24); 7411 } 7412 7413 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7414 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7415 if (val) { 7416 rc = -EIO; 7417 dev_err(hdev->dev, 7418 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7419 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7420 (val >> 2) & 0x1, (val >> 3) & 0x1, 7421 (val >> 4) & 0x1); 7422 7423 val2 = RREG32(base + ch * 0x1000 + 0x070); 7424 dev_err(hdev->dev, 7425 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7426 device, ch * 2 + 1, 7427 RREG32(base + ch * 0x1000 + 0x074), 7428 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7429 (val2 & 0xFF0000) >> 16, 7430 (val2 & 0xFF000000) >> 24); 7431 } 7432 7433 /* Clear interrupts */ 7434 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7435 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7436 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7437 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7438 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7439 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7440 } 7441 7442 val = RREG32(base + 0x8F30); 7443 val2 = RREG32(base + 0x8F34); 7444 if (val | val2) { 7445 rc = -EIO; 7446 dev_err(hdev->dev, 7447 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7448 device, val, val2); 7449 } 7450 val = RREG32(base + 0x8F40); 7451 val2 = RREG32(base + 0x8F44); 7452 if (val | val2) { 7453 rc = -EIO; 7454 dev_err(hdev->dev, 7455 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7456 device, val, val2); 7457 } 7458 7459 return rc; 7460 } 7461 7462 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7463 { 7464 switch (hbm_event_type) { 7465 case GAUDI_EVENT_HBM0_SPI_0: 7466 case GAUDI_EVENT_HBM0_SPI_1: 7467 return 0; 7468 case GAUDI_EVENT_HBM1_SPI_0: 7469 case GAUDI_EVENT_HBM1_SPI_1: 7470 return 1; 7471 case GAUDI_EVENT_HBM2_SPI_0: 7472 case GAUDI_EVENT_HBM2_SPI_1: 7473 return 2; 7474 case GAUDI_EVENT_HBM3_SPI_0: 7475 case GAUDI_EVENT_HBM3_SPI_1: 7476 return 3; 7477 default: 7478 break; 7479 } 7480 7481 /* Should never happen */ 7482 return 0; 7483 } 7484 7485 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7486 char *interrupt_name) 7487 { 7488 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7489 bool soft_reset_required = false; 7490 7491 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7492 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7493 7494 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7495 if (tpc_interrupts_cause & BIT(i)) { 7496 dev_err_ratelimited(hdev->dev, 7497 "TPC%d_%s interrupt cause: %s\n", 7498 tpc_id, interrupt_name, 7499 gaudi_tpc_interrupts_cause[i]); 7500 /* If this is QM error, we need to soft-reset */ 7501 if (i == 15) 7502 soft_reset_required = true; 7503 } 7504 7505 /* Clear interrupts */ 7506 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7507 7508 return soft_reset_required; 7509 } 7510 7511 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7512 { 7513 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7514 } 7515 7516 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7517 { 7518 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7519 } 7520 7521 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7522 { 7523 ktime_t zero_time = ktime_set(0, 0); 7524 7525 mutex_lock(&hdev->clk_throttling.lock); 7526 7527 switch (event_type) { 7528 case GAUDI_EVENT_FIX_POWER_ENV_S: 7529 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7530 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7531 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7532 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7533 dev_info_ratelimited(hdev->dev, 7534 "Clock throttling due to power consumption\n"); 7535 break; 7536 7537 case GAUDI_EVENT_FIX_POWER_ENV_E: 7538 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7540 dev_info_ratelimited(hdev->dev, 7541 "Power envelop is safe, back to optimal clock\n"); 7542 break; 7543 7544 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7545 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7546 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7547 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7548 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7549 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7550 dev_info_ratelimited(hdev->dev, 7551 "Clock throttling due to overheating\n"); 7552 break; 7553 7554 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7555 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7556 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7557 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7558 dev_info_ratelimited(hdev->dev, 7559 "Thermal envelop is safe, back to optimal clock\n"); 7560 break; 7561 7562 default: 7563 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7564 event_type); 7565 break; 7566 } 7567 7568 mutex_unlock(&hdev->clk_throttling.lock); 7569 } 7570 7571 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7572 { 7573 struct gaudi_device *gaudi = hdev->asic_specific; 7574 struct hl_info_fw_err_info fw_err_info; 7575 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7576 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7577 u32 fw_fatal_err_flag = 0, flags = 0; 7578 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7579 >> EQ_CTL_EVENT_TYPE_SHIFT); 7580 bool reset_required, reset_direct = false; 7581 u8 cause; 7582 int rc; 7583 7584 if (event_type >= GAUDI_EVENT_SIZE) { 7585 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7586 event_type, GAUDI_EVENT_SIZE - 1); 7587 return; 7588 } 7589 7590 gaudi->events_stat[event_type]++; 7591 gaudi->events_stat_aggregate[event_type]++; 7592 7593 switch (event_type) { 7594 case GAUDI_EVENT_PCIE_CORE_DERR: 7595 case GAUDI_EVENT_PCIE_IF_DERR: 7596 case GAUDI_EVENT_PCIE_PHY_DERR: 7597 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7598 case GAUDI_EVENT_MME0_ACC_DERR: 7599 case GAUDI_EVENT_MME0_SBAB_DERR: 7600 case GAUDI_EVENT_MME1_ACC_DERR: 7601 case GAUDI_EVENT_MME1_SBAB_DERR: 7602 case GAUDI_EVENT_MME2_ACC_DERR: 7603 case GAUDI_EVENT_MME2_SBAB_DERR: 7604 case GAUDI_EVENT_MME3_ACC_DERR: 7605 case GAUDI_EVENT_MME3_SBAB_DERR: 7606 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7607 fallthrough; 7608 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7609 case GAUDI_EVENT_PSOC_MEM_DERR: 7610 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7611 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7612 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7613 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7614 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7615 case GAUDI_EVENT_MMU_DERR: 7616 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7617 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7618 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7619 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7620 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7621 goto reset_device; 7622 7623 case GAUDI_EVENT_GIC500: 7624 case GAUDI_EVENT_AXI_ECC: 7625 case GAUDI_EVENT_L2_RAM_ECC: 7626 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7627 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7628 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7629 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7630 goto reset_device; 7631 7632 case GAUDI_EVENT_HBM0_SPI_0: 7633 case GAUDI_EVENT_HBM1_SPI_0: 7634 case GAUDI_EVENT_HBM2_SPI_0: 7635 case GAUDI_EVENT_HBM3_SPI_0: 7636 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7637 gaudi_hbm_read_interrupts(hdev, 7638 gaudi_hbm_event_to_dev(event_type), 7639 &eq_entry->hbm_ecc_data); 7640 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7641 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7642 goto reset_device; 7643 7644 case GAUDI_EVENT_HBM0_SPI_1: 7645 case GAUDI_EVENT_HBM1_SPI_1: 7646 case GAUDI_EVENT_HBM2_SPI_1: 7647 case GAUDI_EVENT_HBM3_SPI_1: 7648 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7649 gaudi_hbm_read_interrupts(hdev, 7650 gaudi_hbm_event_to_dev(event_type), 7651 &eq_entry->hbm_ecc_data); 7652 hl_fw_unmask_irq(hdev, event_type); 7653 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7654 break; 7655 7656 case GAUDI_EVENT_TPC0_DEC: 7657 case GAUDI_EVENT_TPC1_DEC: 7658 case GAUDI_EVENT_TPC2_DEC: 7659 case GAUDI_EVENT_TPC3_DEC: 7660 case GAUDI_EVENT_TPC4_DEC: 7661 case GAUDI_EVENT_TPC5_DEC: 7662 case GAUDI_EVENT_TPC6_DEC: 7663 case GAUDI_EVENT_TPC7_DEC: 7664 /* In TPC DEC event, notify on TPC assertion. While there isn't 7665 * a specific event for assertion yet, the FW generates TPC DEC event. 7666 * The SW upper layer will inspect an internal mapped area to indicate 7667 * if the event is a TPC Assertion or a "real" TPC DEC. 7668 */ 7669 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7670 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7671 reset_required = gaudi_tpc_read_interrupts(hdev, 7672 tpc_dec_event_to_tpc_id(event_type), 7673 "AXI_SLV_DEC_Error"); 7674 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7675 if (reset_required) { 7676 dev_err(hdev->dev, "reset required due to %s\n", 7677 gaudi_irq_map_table[event_type].name); 7678 7679 reset_direct = true; 7680 goto reset_device; 7681 } else { 7682 hl_fw_unmask_irq(hdev, event_type); 7683 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7684 } 7685 break; 7686 7687 case GAUDI_EVENT_TPC0_KRN_ERR: 7688 case GAUDI_EVENT_TPC1_KRN_ERR: 7689 case GAUDI_EVENT_TPC2_KRN_ERR: 7690 case GAUDI_EVENT_TPC3_KRN_ERR: 7691 case GAUDI_EVENT_TPC4_KRN_ERR: 7692 case GAUDI_EVENT_TPC5_KRN_ERR: 7693 case GAUDI_EVENT_TPC6_KRN_ERR: 7694 case GAUDI_EVENT_TPC7_KRN_ERR: 7695 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7696 reset_required = gaudi_tpc_read_interrupts(hdev, 7697 tpc_krn_event_to_tpc_id(event_type), 7698 "KRN_ERR"); 7699 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7700 if (reset_required) { 7701 dev_err(hdev->dev, "reset required due to %s\n", 7702 gaudi_irq_map_table[event_type].name); 7703 7704 reset_direct = true; 7705 goto reset_device; 7706 } else { 7707 hl_fw_unmask_irq(hdev, event_type); 7708 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7709 } 7710 break; 7711 7712 case GAUDI_EVENT_PCIE_CORE_SERR: 7713 case GAUDI_EVENT_PCIE_IF_SERR: 7714 case GAUDI_EVENT_PCIE_PHY_SERR: 7715 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7716 case GAUDI_EVENT_MME0_ACC_SERR: 7717 case GAUDI_EVENT_MME0_SBAB_SERR: 7718 case GAUDI_EVENT_MME1_ACC_SERR: 7719 case GAUDI_EVENT_MME1_SBAB_SERR: 7720 case GAUDI_EVENT_MME2_ACC_SERR: 7721 case GAUDI_EVENT_MME2_SBAB_SERR: 7722 case GAUDI_EVENT_MME3_ACC_SERR: 7723 case GAUDI_EVENT_MME3_SBAB_SERR: 7724 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7725 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7726 case GAUDI_EVENT_PSOC_MEM_SERR: 7727 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7728 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7729 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7730 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7731 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7732 fallthrough; 7733 case GAUDI_EVENT_MMU_SERR: 7734 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7735 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7736 hl_fw_unmask_irq(hdev, event_type); 7737 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7738 break; 7739 7740 case GAUDI_EVENT_PCIE_DEC: 7741 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7742 case GAUDI_EVENT_PSOC_AXI_DEC: 7743 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7744 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7745 hl_fw_unmask_irq(hdev, event_type); 7746 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7747 break; 7748 7749 case GAUDI_EVENT_MMU_PAGE_FAULT: 7750 case GAUDI_EVENT_MMU_WR_PERM: 7751 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7752 hl_fw_unmask_irq(hdev, event_type); 7753 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7754 break; 7755 7756 case GAUDI_EVENT_MME0_WBC_RSP: 7757 case GAUDI_EVENT_MME0_SBAB0_RSP: 7758 case GAUDI_EVENT_MME1_WBC_RSP: 7759 case GAUDI_EVENT_MME1_SBAB0_RSP: 7760 case GAUDI_EVENT_MME2_WBC_RSP: 7761 case GAUDI_EVENT_MME2_SBAB0_RSP: 7762 case GAUDI_EVENT_MME3_WBC_RSP: 7763 case GAUDI_EVENT_MME3_SBAB0_RSP: 7764 case GAUDI_EVENT_RAZWI_OR_ADC: 7765 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7766 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7767 fallthrough; 7768 case GAUDI_EVENT_NIC0_QM0: 7769 case GAUDI_EVENT_NIC0_QM1: 7770 case GAUDI_EVENT_NIC1_QM0: 7771 case GAUDI_EVENT_NIC1_QM1: 7772 case GAUDI_EVENT_NIC2_QM0: 7773 case GAUDI_EVENT_NIC2_QM1: 7774 case GAUDI_EVENT_NIC3_QM0: 7775 case GAUDI_EVENT_NIC3_QM1: 7776 case GAUDI_EVENT_NIC4_QM0: 7777 case GAUDI_EVENT_NIC4_QM1: 7778 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7779 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7780 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7781 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7782 hl_fw_unmask_irq(hdev, event_type); 7783 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7784 break; 7785 7786 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7787 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7788 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7789 goto reset_device; 7790 7791 case GAUDI_EVENT_TPC0_BMON_SPMU: 7792 case GAUDI_EVENT_TPC1_BMON_SPMU: 7793 case GAUDI_EVENT_TPC2_BMON_SPMU: 7794 case GAUDI_EVENT_TPC3_BMON_SPMU: 7795 case GAUDI_EVENT_TPC4_BMON_SPMU: 7796 case GAUDI_EVENT_TPC5_BMON_SPMU: 7797 case GAUDI_EVENT_TPC6_BMON_SPMU: 7798 case GAUDI_EVENT_TPC7_BMON_SPMU: 7799 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7800 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7801 hl_fw_unmask_irq(hdev, event_type); 7802 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7803 break; 7804 7805 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7806 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7807 hl_fw_unmask_irq(hdev, event_type); 7808 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7809 break; 7810 7811 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7812 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7813 gaudi_print_sm_sei_info(hdev, event_type, 7814 &eq_entry->sm_sei_data); 7815 rc = hl_state_dump(hdev); 7816 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7817 if (rc) 7818 dev_err(hdev->dev, 7819 "Error during system state dump %d\n", rc); 7820 hl_fw_unmask_irq(hdev, event_type); 7821 break; 7822 7823 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7824 break; 7825 7826 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7827 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7828 hl_fw_unmask_irq(hdev, event_type); 7829 break; 7830 7831 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7832 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7833 dev_err(hdev->dev, 7834 "Received high temp H/W interrupt %d (cause %d)\n", 7835 event_type, cause); 7836 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7837 break; 7838 7839 case GAUDI_EVENT_DEV_RESET_REQ: 7840 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7841 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7842 goto reset_device; 7843 7844 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7845 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7846 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7847 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7848 goto reset_device; 7849 7850 case GAUDI_EVENT_FW_ALIVE_S: 7851 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7852 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7853 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7854 fw_err_info.event_id = event_type; 7855 fw_err_info.event_mask = &event_mask; 7856 hl_handle_fw_err(hdev, &fw_err_info); 7857 goto reset_device; 7858 7859 default: 7860 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7861 event_type); 7862 break; 7863 } 7864 7865 if (event_mask) 7866 hl_notifier_event_send_all(hdev, event_mask); 7867 7868 return; 7869 7870 reset_device: 7871 reset_required = true; 7872 7873 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7874 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7875 7876 /* notify on device unavailable while the reset triggered by fw */ 7877 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7878 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7879 } else if (hdev->hard_reset_on_fw_events) { 7880 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7881 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7882 } else { 7883 reset_required = false; 7884 } 7885 7886 if (reset_required) { 7887 /* escalate general hw errors to critical/fatal error */ 7888 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7889 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7890 7891 hl_device_cond_reset(hdev, flags, event_mask); 7892 } else { 7893 hl_fw_unmask_irq(hdev, event_type); 7894 /* Notification on occurred event needs to be sent although reset is not executed */ 7895 if (event_mask) 7896 hl_notifier_event_send_all(hdev, event_mask); 7897 } 7898 } 7899 7900 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7901 { 7902 struct gaudi_device *gaudi = hdev->asic_specific; 7903 7904 if (aggregate) { 7905 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7906 return gaudi->events_stat_aggregate; 7907 } 7908 7909 *size = (u32) sizeof(gaudi->events_stat); 7910 return gaudi->events_stat; 7911 } 7912 7913 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7914 { 7915 struct gaudi_device *gaudi = hdev->asic_specific; 7916 u32 status, timeout_usec; 7917 int rc; 7918 7919 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7920 hdev->reset_info.hard_reset_pending) 7921 return 0; 7922 7923 if (hdev->pldm) 7924 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7925 else 7926 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7927 7928 /* L0 & L1 invalidation */ 7929 WREG32(mmSTLB_INV_PS, 3); 7930 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7931 WREG32(mmSTLB_INV_PS, 2); 7932 7933 rc = hl_poll_timeout( 7934 hdev, 7935 mmSTLB_INV_PS, 7936 status, 7937 !status, 7938 1000, 7939 timeout_usec); 7940 7941 WREG32(mmSTLB_INV_SET, 0); 7942 7943 return rc; 7944 } 7945 7946 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 7947 bool is_hard, u32 flags, 7948 u32 asid, u64 va, u64 size) 7949 { 7950 /* Treat as invalidate all because there is no range invalidation 7951 * in Gaudi 7952 */ 7953 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 7954 } 7955 7956 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 7957 { 7958 u32 status, timeout_usec; 7959 int rc; 7960 7961 if (hdev->pldm) 7962 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7963 else 7964 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7965 7966 WREG32(MMU_ASID, asid); 7967 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 7968 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 7969 WREG32(MMU_BUSY, 0x80000000); 7970 7971 rc = hl_poll_timeout( 7972 hdev, 7973 MMU_BUSY, 7974 status, 7975 !(status & 0x80000000), 7976 1000, 7977 timeout_usec); 7978 7979 if (rc) { 7980 dev_err(hdev->dev, 7981 "Timeout during MMU hop0 config of asid %d\n", asid); 7982 return rc; 7983 } 7984 7985 return 0; 7986 } 7987 7988 static int gaudi_send_heartbeat(struct hl_device *hdev) 7989 { 7990 struct gaudi_device *gaudi = hdev->asic_specific; 7991 7992 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7993 return 0; 7994 7995 return hl_fw_send_heartbeat(hdev); 7996 } 7997 7998 static int gaudi_cpucp_info_get(struct hl_device *hdev) 7999 { 8000 struct gaudi_device *gaudi = hdev->asic_specific; 8001 struct asic_fixed_properties *prop = &hdev->asic_prop; 8002 int rc; 8003 8004 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8005 return 0; 8006 8007 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8008 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8009 mmCPU_BOOT_ERR1); 8010 if (rc) 8011 return rc; 8012 8013 if (!strlen(prop->cpucp_info.card_name)) 8014 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8015 CARD_NAME_MAX_LEN); 8016 8017 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8018 8019 set_default_power_values(hdev); 8020 8021 return 0; 8022 } 8023 8024 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8025 struct engines_data *e) 8026 { 8027 struct gaudi_device *gaudi = hdev->asic_specific; 8028 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8029 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8030 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8031 unsigned long *mask = (unsigned long *)mask_arr; 8032 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8033 bool is_idle = true, is_eng_idle, is_slave; 8034 u64 offset; 8035 int i, dma_id, port; 8036 8037 if (e) 8038 hl_engine_data_sprintf(e, 8039 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8040 "--- ------- ------------ ---------- -------------\n"); 8041 8042 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8043 dma_id = gaudi_dma_assignment[i]; 8044 offset = dma_id * DMA_QMAN_OFFSET; 8045 8046 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8047 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8048 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8049 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8050 IS_DMA_IDLE(dma_core_sts0); 8051 is_idle &= is_eng_idle; 8052 8053 if (mask && !is_eng_idle) 8054 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8055 if (e) 8056 hl_engine_data_sprintf(e, fmt, dma_id, 8057 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8058 qm_cgm_sts, dma_core_sts0); 8059 } 8060 8061 if (e) 8062 hl_engine_data_sprintf(e, 8063 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8064 "--- ------- ------------ ---------- ----------\n"); 8065 8066 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8067 offset = i * TPC_QMAN_OFFSET; 8068 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8069 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8070 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8071 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8072 IS_TPC_IDLE(tpc_cfg_sts); 8073 is_idle &= is_eng_idle; 8074 8075 if (mask && !is_eng_idle) 8076 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8077 if (e) 8078 hl_engine_data_sprintf(e, fmt, i, 8079 is_eng_idle ? "Y" : "N", 8080 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8081 } 8082 8083 if (e) 8084 hl_engine_data_sprintf(e, 8085 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8086 "--- ------- ------------ ---------- -----------\n"); 8087 8088 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8089 offset = i * MME_QMAN_OFFSET; 8090 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8091 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8092 8093 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8094 is_slave = i % 2; 8095 if (!is_slave) { 8096 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8097 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8098 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8099 } 8100 8101 is_idle &= is_eng_idle; 8102 8103 if (mask && !is_eng_idle) 8104 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8105 if (e) { 8106 if (!is_slave) 8107 hl_engine_data_sprintf(e, fmt, i, 8108 is_eng_idle ? "Y" : "N", 8109 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8110 else 8111 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8112 is_eng_idle ? "Y" : "N", "-", 8113 "-", mme_arch_sts); 8114 } 8115 } 8116 8117 if (e) 8118 hl_engine_data_sprintf(e, 8119 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8120 "--- ------- ------------ ----------\n"); 8121 8122 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8123 offset = i * NIC_MACRO_QMAN_OFFSET; 8124 port = 2 * i; 8125 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8126 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8127 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8128 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8129 is_idle &= is_eng_idle; 8130 8131 if (mask && !is_eng_idle) 8132 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8133 if (e) 8134 hl_engine_data_sprintf(e, nic_fmt, port, 8135 is_eng_idle ? "Y" : "N", 8136 qm_glbl_sts0, qm_cgm_sts); 8137 } 8138 8139 port = 2 * i + 1; 8140 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8141 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8142 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8143 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8144 is_idle &= is_eng_idle; 8145 8146 if (mask && !is_eng_idle) 8147 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8148 if (e) 8149 hl_engine_data_sprintf(e, nic_fmt, port, 8150 is_eng_idle ? "Y" : "N", 8151 qm_glbl_sts0, qm_cgm_sts); 8152 } 8153 } 8154 8155 if (e) 8156 hl_engine_data_sprintf(e, "\n"); 8157 8158 return is_idle; 8159 } 8160 8161 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8162 __acquires(&gaudi->hw_queues_lock) 8163 { 8164 struct gaudi_device *gaudi = hdev->asic_specific; 8165 8166 spin_lock(&gaudi->hw_queues_lock); 8167 } 8168 8169 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8170 __releases(&gaudi->hw_queues_lock) 8171 { 8172 struct gaudi_device *gaudi = hdev->asic_specific; 8173 8174 spin_unlock(&gaudi->hw_queues_lock); 8175 } 8176 8177 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8178 { 8179 return hdev->pdev->device; 8180 } 8181 8182 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8183 size_t max_size) 8184 { 8185 struct gaudi_device *gaudi = hdev->asic_specific; 8186 8187 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8188 return 0; 8189 8190 return hl_fw_get_eeprom_data(hdev, data, max_size); 8191 } 8192 8193 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8194 { 8195 struct gaudi_device *gaudi = hdev->asic_specific; 8196 8197 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8198 return 0; 8199 8200 return hl_fw_get_monitor_dump(hdev, data); 8201 } 8202 8203 /* 8204 * this function should be used only during initialization and/or after reset, 8205 * when there are no active users. 8206 */ 8207 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8208 { 8209 u64 kernel_timeout; 8210 u32 status, offset; 8211 int rc; 8212 8213 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8214 8215 if (hdev->pldm) 8216 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8217 else 8218 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8219 8220 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8221 lower_32_bits(tpc_kernel)); 8222 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8223 upper_32_bits(tpc_kernel)); 8224 8225 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8226 lower_32_bits(tpc_kernel)); 8227 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8228 upper_32_bits(tpc_kernel)); 8229 /* set a valid LUT pointer, content is of no significance */ 8230 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8231 lower_32_bits(tpc_kernel)); 8232 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8233 upper_32_bits(tpc_kernel)); 8234 8235 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8236 lower_32_bits(CFG_BASE + 8237 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8238 8239 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8240 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8241 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8242 /* wait a bit for the engine to start executing */ 8243 usleep_range(1000, 1500); 8244 8245 /* wait until engine has finished executing */ 8246 rc = hl_poll_timeout( 8247 hdev, 8248 mmTPC0_CFG_STATUS + offset, 8249 status, 8250 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8251 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8252 1000, 8253 kernel_timeout); 8254 8255 if (rc) { 8256 dev_err(hdev->dev, 8257 "Timeout while waiting for TPC%d icache prefetch\n", 8258 tpc_id); 8259 return -EIO; 8260 } 8261 8262 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8263 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8264 8265 /* wait a bit for the engine to start executing */ 8266 usleep_range(1000, 1500); 8267 8268 /* wait until engine has finished executing */ 8269 rc = hl_poll_timeout( 8270 hdev, 8271 mmTPC0_CFG_STATUS + offset, 8272 status, 8273 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8274 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8275 1000, 8276 kernel_timeout); 8277 8278 if (rc) { 8279 dev_err(hdev->dev, 8280 "Timeout while waiting for TPC%d vector pipe\n", 8281 tpc_id); 8282 return -EIO; 8283 } 8284 8285 rc = hl_poll_timeout( 8286 hdev, 8287 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8288 status, 8289 (status == 0), 8290 1000, 8291 kernel_timeout); 8292 8293 if (rc) { 8294 dev_err(hdev->dev, 8295 "Timeout while waiting for TPC%d kernel to execute\n", 8296 tpc_id); 8297 return -EIO; 8298 } 8299 8300 return 0; 8301 } 8302 8303 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8304 struct hl_ctx *ctx) 8305 { 8306 struct gaudi_device *gaudi = hdev->asic_specific; 8307 int min_alloc_order, rc, collective_cb_size; 8308 8309 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8310 return 0; 8311 8312 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8313 HOST_SPACE_INTERNAL_CB_SZ, 8314 &hdev->internal_cb_pool_dma_addr, 8315 GFP_KERNEL | __GFP_ZERO); 8316 8317 if (!hdev->internal_cb_pool_virt_addr) 8318 return -ENOMEM; 8319 8320 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8321 sizeof(struct packet_fence); 8322 min_alloc_order = ilog2(collective_cb_size); 8323 8324 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8325 if (!hdev->internal_cb_pool) { 8326 dev_err(hdev->dev, 8327 "Failed to create internal CB pool\n"); 8328 rc = -ENOMEM; 8329 goto free_internal_cb_pool; 8330 } 8331 8332 rc = gen_pool_add(hdev->internal_cb_pool, 8333 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8334 HOST_SPACE_INTERNAL_CB_SZ, -1); 8335 if (rc) { 8336 dev_err(hdev->dev, 8337 "Failed to add memory to internal CB pool\n"); 8338 rc = -EFAULT; 8339 goto destroy_internal_cb_pool; 8340 } 8341 8342 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8343 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8344 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8345 8346 if (!hdev->internal_cb_va_base) { 8347 rc = -ENOMEM; 8348 goto destroy_internal_cb_pool; 8349 } 8350 8351 mutex_lock(&hdev->mmu_lock); 8352 8353 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8354 hdev->internal_cb_pool_dma_addr, 8355 HOST_SPACE_INTERNAL_CB_SZ); 8356 if (rc) 8357 goto unreserve_internal_cb_pool; 8358 8359 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8360 if (rc) 8361 goto unmap_internal_cb_pool; 8362 8363 mutex_unlock(&hdev->mmu_lock); 8364 8365 return 0; 8366 8367 unmap_internal_cb_pool: 8368 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8369 HOST_SPACE_INTERNAL_CB_SZ); 8370 unreserve_internal_cb_pool: 8371 mutex_unlock(&hdev->mmu_lock); 8372 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8373 HOST_SPACE_INTERNAL_CB_SZ); 8374 destroy_internal_cb_pool: 8375 gen_pool_destroy(hdev->internal_cb_pool); 8376 free_internal_cb_pool: 8377 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8378 hdev->internal_cb_pool_dma_addr); 8379 8380 return rc; 8381 } 8382 8383 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8384 struct hl_ctx *ctx) 8385 { 8386 struct gaudi_device *gaudi = hdev->asic_specific; 8387 8388 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8389 return; 8390 8391 mutex_lock(&hdev->mmu_lock); 8392 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8393 HOST_SPACE_INTERNAL_CB_SZ); 8394 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8395 HOST_SPACE_INTERNAL_CB_SZ); 8396 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8397 mutex_unlock(&hdev->mmu_lock); 8398 8399 gen_pool_destroy(hdev->internal_cb_pool); 8400 8401 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8402 hdev->internal_cb_pool_dma_addr); 8403 } 8404 8405 static int gaudi_ctx_init(struct hl_ctx *ctx) 8406 { 8407 int rc; 8408 8409 if (ctx->asid == HL_KERNEL_ASID_ID) 8410 return 0; 8411 8412 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8413 if (rc) 8414 return rc; 8415 8416 rc = gaudi_restore_user_registers(ctx->hdev); 8417 if (rc) 8418 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8419 8420 return rc; 8421 } 8422 8423 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8424 { 8425 if (ctx->asid == HL_KERNEL_ASID_ID) 8426 return; 8427 8428 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8429 } 8430 8431 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8432 { 8433 return 0; 8434 } 8435 8436 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8437 { 8438 return gaudi_cq_assignment[cq_idx]; 8439 } 8440 8441 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8442 { 8443 return sizeof(struct packet_msg_short) + 8444 sizeof(struct packet_msg_prot) * 2; 8445 } 8446 8447 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8448 { 8449 return sizeof(struct packet_msg_short) * 4 + 8450 sizeof(struct packet_fence) + 8451 sizeof(struct packet_msg_prot) * 2; 8452 } 8453 8454 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8455 { 8456 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8457 } 8458 8459 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8460 u32 size, bool eb) 8461 { 8462 struct hl_cb *cb = (struct hl_cb *) data; 8463 struct packet_msg_short *pkt; 8464 u32 value, ctl, pkt_size = sizeof(*pkt); 8465 8466 pkt = cb->kernel_address + size; 8467 memset(pkt, 0, pkt_size); 8468 8469 /* Inc by 1, Mode ADD */ 8470 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8471 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8472 8473 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8474 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8475 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8476 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8477 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8478 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8479 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8480 8481 pkt->value = cpu_to_le32(value); 8482 pkt->ctl = cpu_to_le32(ctl); 8483 8484 return size + pkt_size; 8485 } 8486 8487 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8488 u16 addr) 8489 { 8490 u32 ctl, pkt_size = sizeof(*pkt); 8491 8492 memset(pkt, 0, pkt_size); 8493 8494 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8495 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8496 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8497 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8498 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8499 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8500 8501 pkt->value = cpu_to_le32(value); 8502 pkt->ctl = cpu_to_le32(ctl); 8503 8504 return pkt_size; 8505 } 8506 8507 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8508 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8509 u16 sob_val, u16 mon_id) 8510 { 8511 u64 monitor_base; 8512 u32 ctl, value, pkt_size = sizeof(*pkt); 8513 u16 msg_addr_offset; 8514 u8 mask; 8515 8516 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8517 dev_err(hdev->dev, 8518 "sob_base %u (mask %#x) is not valid\n", 8519 sob_base, sob_mask); 8520 return 0; 8521 } 8522 8523 /* 8524 * monitor_base should be the content of the base0 address registers, 8525 * so it will be added to the msg short offsets 8526 */ 8527 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8528 8529 msg_addr_offset = 8530 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8531 monitor_base; 8532 8533 memset(pkt, 0, pkt_size); 8534 8535 /* Monitor config packet: bind the monitor to a sync object */ 8536 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8537 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8538 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8539 0); /* GREATER OR EQUAL*/ 8540 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8541 8542 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8543 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8544 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8545 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8546 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8549 8550 pkt->value = cpu_to_le32(value); 8551 pkt->ctl = cpu_to_le32(ctl); 8552 8553 return pkt_size; 8554 } 8555 8556 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8557 { 8558 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8559 8560 memset(pkt, 0, pkt_size); 8561 8562 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8563 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8564 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8565 8566 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8567 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8568 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8569 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8570 8571 pkt->cfg = cpu_to_le32(cfg); 8572 pkt->ctl = cpu_to_le32(ctl); 8573 8574 return pkt_size; 8575 } 8576 8577 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8578 { 8579 u32 offset, nic_index; 8580 8581 switch (queue_id) { 8582 case GAUDI_QUEUE_ID_DMA_0_0: 8583 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8584 break; 8585 case GAUDI_QUEUE_ID_DMA_0_1: 8586 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8587 break; 8588 case GAUDI_QUEUE_ID_DMA_0_2: 8589 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8590 break; 8591 case GAUDI_QUEUE_ID_DMA_0_3: 8592 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8593 break; 8594 case GAUDI_QUEUE_ID_DMA_1_0: 8595 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8596 break; 8597 case GAUDI_QUEUE_ID_DMA_1_1: 8598 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8599 break; 8600 case GAUDI_QUEUE_ID_DMA_1_2: 8601 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8602 break; 8603 case GAUDI_QUEUE_ID_DMA_1_3: 8604 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8605 break; 8606 case GAUDI_QUEUE_ID_DMA_5_0: 8607 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8608 break; 8609 case GAUDI_QUEUE_ID_DMA_5_1: 8610 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8611 break; 8612 case GAUDI_QUEUE_ID_DMA_5_2: 8613 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8614 break; 8615 case GAUDI_QUEUE_ID_DMA_5_3: 8616 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8617 break; 8618 case GAUDI_QUEUE_ID_TPC_7_0: 8619 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8620 break; 8621 case GAUDI_QUEUE_ID_TPC_7_1: 8622 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8623 break; 8624 case GAUDI_QUEUE_ID_TPC_7_2: 8625 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8626 break; 8627 case GAUDI_QUEUE_ID_TPC_7_3: 8628 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8629 break; 8630 case GAUDI_QUEUE_ID_NIC_0_0: 8631 case GAUDI_QUEUE_ID_NIC_1_0: 8632 case GAUDI_QUEUE_ID_NIC_2_0: 8633 case GAUDI_QUEUE_ID_NIC_3_0: 8634 case GAUDI_QUEUE_ID_NIC_4_0: 8635 case GAUDI_QUEUE_ID_NIC_5_0: 8636 case GAUDI_QUEUE_ID_NIC_6_0: 8637 case GAUDI_QUEUE_ID_NIC_7_0: 8638 case GAUDI_QUEUE_ID_NIC_8_0: 8639 case GAUDI_QUEUE_ID_NIC_9_0: 8640 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8641 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8642 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8643 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8644 break; 8645 case GAUDI_QUEUE_ID_NIC_0_1: 8646 case GAUDI_QUEUE_ID_NIC_1_1: 8647 case GAUDI_QUEUE_ID_NIC_2_1: 8648 case GAUDI_QUEUE_ID_NIC_3_1: 8649 case GAUDI_QUEUE_ID_NIC_4_1: 8650 case GAUDI_QUEUE_ID_NIC_5_1: 8651 case GAUDI_QUEUE_ID_NIC_6_1: 8652 case GAUDI_QUEUE_ID_NIC_7_1: 8653 case GAUDI_QUEUE_ID_NIC_8_1: 8654 case GAUDI_QUEUE_ID_NIC_9_1: 8655 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8656 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8657 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8658 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8659 break; 8660 case GAUDI_QUEUE_ID_NIC_0_2: 8661 case GAUDI_QUEUE_ID_NIC_1_2: 8662 case GAUDI_QUEUE_ID_NIC_2_2: 8663 case GAUDI_QUEUE_ID_NIC_3_2: 8664 case GAUDI_QUEUE_ID_NIC_4_2: 8665 case GAUDI_QUEUE_ID_NIC_5_2: 8666 case GAUDI_QUEUE_ID_NIC_6_2: 8667 case GAUDI_QUEUE_ID_NIC_7_2: 8668 case GAUDI_QUEUE_ID_NIC_8_2: 8669 case GAUDI_QUEUE_ID_NIC_9_2: 8670 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8671 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8672 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8673 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8674 break; 8675 case GAUDI_QUEUE_ID_NIC_0_3: 8676 case GAUDI_QUEUE_ID_NIC_1_3: 8677 case GAUDI_QUEUE_ID_NIC_2_3: 8678 case GAUDI_QUEUE_ID_NIC_3_3: 8679 case GAUDI_QUEUE_ID_NIC_4_3: 8680 case GAUDI_QUEUE_ID_NIC_5_3: 8681 case GAUDI_QUEUE_ID_NIC_6_3: 8682 case GAUDI_QUEUE_ID_NIC_7_3: 8683 case GAUDI_QUEUE_ID_NIC_8_3: 8684 case GAUDI_QUEUE_ID_NIC_9_3: 8685 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8686 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8687 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8688 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8689 break; 8690 default: 8691 return -EINVAL; 8692 } 8693 8694 *addr = CFG_BASE + offset; 8695 8696 return 0; 8697 } 8698 8699 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8700 { 8701 u64 monitor_base; 8702 u32 size = 0; 8703 u16 msg_addr_offset; 8704 8705 /* 8706 * monitor_base should be the content of the base0 address registers, 8707 * so it will be added to the msg short offsets 8708 */ 8709 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8710 8711 /* First monitor config packet: low address of the sync */ 8712 msg_addr_offset = 8713 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8714 monitor_base; 8715 8716 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8717 msg_addr_offset); 8718 8719 /* Second monitor config packet: high address of the sync */ 8720 msg_addr_offset = 8721 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8722 monitor_base; 8723 8724 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8725 msg_addr_offset); 8726 8727 /* 8728 * Third monitor config packet: the payload, i.e. what to write when the 8729 * sync triggers 8730 */ 8731 msg_addr_offset = 8732 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8733 monitor_base; 8734 8735 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8736 8737 return size; 8738 } 8739 8740 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8741 struct hl_gen_wait_properties *prop) 8742 { 8743 struct hl_cb *cb = (struct hl_cb *) prop->data; 8744 void *buf = cb->kernel_address; 8745 u64 fence_addr = 0; 8746 u32 size = prop->size; 8747 8748 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8749 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8750 prop->q_idx); 8751 return 0; 8752 } 8753 8754 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8755 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8756 prop->sob_mask, prop->sob_val, prop->mon_id); 8757 size += gaudi_add_fence_pkt(buf + size); 8758 8759 return size; 8760 } 8761 8762 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8763 { 8764 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8765 8766 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8767 hw_sob->sob_id); 8768 8769 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8770 hw_sob->sob_id * 4, 0); 8771 8772 kref_init(&hw_sob->kref); 8773 } 8774 8775 static u64 gaudi_get_device_time(struct hl_device *hdev) 8776 { 8777 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8778 8779 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8780 } 8781 8782 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8783 u32 *block_size, u32 *block_id) 8784 { 8785 return -EPERM; 8786 } 8787 8788 static int gaudi_block_mmap(struct hl_device *hdev, 8789 struct vm_area_struct *vma, 8790 u32 block_id, u32 block_size) 8791 { 8792 return -EPERM; 8793 } 8794 8795 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8796 { 8797 struct cpu_dyn_regs *dyn_regs = 8798 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8799 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8800 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8801 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8802 8803 WREG32(irq_handler_offset, 8804 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8805 } 8806 8807 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8808 { 8809 return -EINVAL; 8810 } 8811 8812 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8813 { 8814 switch (pll_idx) { 8815 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8816 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8817 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8818 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8819 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8820 case HL_GAUDI_MME_PLL: return MME_PLL; 8821 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8822 case HL_GAUDI_IF_PLL: return IF_PLL; 8823 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8824 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8825 default: return -EINVAL; 8826 } 8827 } 8828 8829 static int gaudi_add_sync_to_engine_map_entry( 8830 struct hl_sync_to_engine_map *map, u32 reg_value, 8831 enum hl_sync_engine_type engine_type, u32 engine_id) 8832 { 8833 struct hl_sync_to_engine_map_entry *entry; 8834 8835 /* Reg value represents a partial address of sync object, 8836 * it is used as unique identifier. For this we need to 8837 * clear the cutoff cfg base bits from the value. 8838 */ 8839 if (reg_value == 0 || reg_value == 0xffffffff) 8840 return 0; 8841 reg_value -= lower_32_bits(CFG_BASE); 8842 8843 /* create a new hash entry */ 8844 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8845 if (!entry) 8846 return -ENOMEM; 8847 entry->engine_type = engine_type; 8848 entry->engine_id = engine_id; 8849 entry->sync_id = reg_value; 8850 hash_add(map->tb, &entry->node, reg_value); 8851 8852 return 0; 8853 } 8854 8855 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8856 struct hl_sync_to_engine_map *map) 8857 { 8858 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8859 int i, j, rc; 8860 u32 reg_value; 8861 8862 /* Iterate over TPC engines */ 8863 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8864 8865 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8866 sds->props[SP_NEXT_TPC] * i); 8867 8868 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8869 ENGINE_TPC, i); 8870 if (rc) 8871 goto free_sync_to_engine_map; 8872 } 8873 8874 /* Iterate over MME engines */ 8875 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8876 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8877 8878 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8879 sds->props[SP_NEXT_MME] * i + 8880 j * sizeof(u32)); 8881 8882 rc = gaudi_add_sync_to_engine_map_entry( 8883 map, reg_value, ENGINE_MME, 8884 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8885 if (rc) 8886 goto free_sync_to_engine_map; 8887 } 8888 } 8889 8890 /* Iterate over DMA engines */ 8891 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8892 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8893 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8894 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8895 ENGINE_DMA, i); 8896 if (rc) 8897 goto free_sync_to_engine_map; 8898 } 8899 8900 return 0; 8901 8902 free_sync_to_engine_map: 8903 hl_state_dump_free_sync_to_engine_map(map); 8904 8905 return rc; 8906 } 8907 8908 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8909 { 8910 return FIELD_GET( 8911 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8912 mon->status); 8913 } 8914 8915 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8916 { 8917 const size_t max_write = 10; 8918 u32 gid, mask, sob; 8919 int i, offset; 8920 8921 /* Sync object ID is calculated as follows: 8922 * (8 * group_id + cleared bits in mask) 8923 */ 8924 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8925 mon->arm_data); 8926 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8927 mon->arm_data); 8928 8929 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8930 max_write; mask >>= 1, i++) { 8931 if (!(mask & 1)) { 8932 sob = gid * MONITOR_MAX_SOBS + i; 8933 8934 if (offset > 0) 8935 offset += snprintf(sobs + offset, max_write, 8936 ", "); 8937 8938 offset += snprintf(sobs + offset, max_write, "%u", sob); 8939 } 8940 } 8941 } 8942 8943 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8944 struct hl_device *hdev, 8945 struct hl_mon_state_dump *mon) 8946 { 8947 const char *name; 8948 char scratch_buf1[BIN_REG_STRING_SIZE], 8949 scratch_buf2[BIN_REG_STRING_SIZE]; 8950 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8951 8952 name = hl_state_dump_get_monitor_name(hdev, mon); 8953 if (!name) 8954 name = ""; 8955 8956 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 8957 8958 return hl_snprintf_resize( 8959 buf, size, offset, 8960 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 8961 mon->id, name, 8962 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8963 mon->arm_data), 8964 hl_format_as_binary( 8965 scratch_buf1, sizeof(scratch_buf1), 8966 FIELD_GET( 8967 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8968 mon->arm_data)), 8969 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 8970 mon->arm_data), 8971 mon->wr_data, 8972 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 8973 hl_format_as_binary( 8974 scratch_buf2, sizeof(scratch_buf2), 8975 FIELD_GET( 8976 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 8977 mon->status)), 8978 monitored_sobs); 8979 } 8980 8981 8982 static int gaudi_print_fences_single_engine( 8983 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 8984 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 8985 size_t *size, size_t *offset) 8986 { 8987 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8988 int rc = -ENOMEM, i; 8989 u32 *statuses, *fences; 8990 8991 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 8992 sizeof(*statuses), GFP_KERNEL); 8993 if (!statuses) 8994 goto out; 8995 8996 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 8997 sds->props[SP_ENGINE_NUM_OF_QUEUES], 8998 sizeof(*fences), GFP_KERNEL); 8999 if (!fences) 9000 goto free_status; 9001 9002 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9003 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9004 9005 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9006 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9007 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9008 9009 /* The actual print */ 9010 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9011 u32 fence_id; 9012 u64 fence_cnt, fence_rdata; 9013 const char *engine_name; 9014 9015 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9016 statuses[i])) 9017 continue; 9018 9019 fence_id = 9020 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9021 fence_cnt = base_offset + CFG_BASE + 9022 sizeof(u32) * 9023 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9024 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9025 sds->props[SP_FENCE0_RDATA_OFFSET]; 9026 engine_name = hl_sync_engine_to_string(engine_type); 9027 9028 rc = hl_snprintf_resize( 9029 buf, size, offset, 9030 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9031 engine_name, engine_id, 9032 i, fence_id, 9033 fence_cnt, engine_name, engine_id, fence_id, i, 9034 fence_rdata, engine_name, engine_id, fence_id, i, 9035 fences[fence_id], 9036 statuses[i]); 9037 if (rc) 9038 goto free_fences; 9039 } 9040 9041 rc = 0; 9042 9043 free_fences: 9044 kfree(fences); 9045 free_status: 9046 kfree(statuses); 9047 out: 9048 return rc; 9049 } 9050 9051 9052 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9053 .monitor_valid = gaudi_monitor_valid, 9054 .print_single_monitor = gaudi_print_single_monitor, 9055 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9056 .print_fences_single_engine = gaudi_print_fences_single_engine, 9057 }; 9058 9059 static void gaudi_state_dump_init(struct hl_device *hdev) 9060 { 9061 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9062 int i; 9063 9064 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9065 hash_add(sds->so_id_to_str_tb, 9066 &gaudi_so_id_to_str[i].node, 9067 gaudi_so_id_to_str[i].id); 9068 9069 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9070 hash_add(sds->monitor_id_to_str_tb, 9071 &gaudi_monitor_id_to_str[i].node, 9072 gaudi_monitor_id_to_str[i].id); 9073 9074 sds->props = gaudi_state_dump_specs_props; 9075 9076 sds->sync_namager_names = gaudi_sync_manager_names; 9077 9078 sds->funcs = gaudi_state_dump_funcs; 9079 } 9080 9081 static u32 *gaudi_get_stream_master_qid_arr(void) 9082 { 9083 return gaudi_stream_master; 9084 } 9085 9086 static int gaudi_set_dram_properties(struct hl_device *hdev) 9087 { 9088 return 0; 9089 } 9090 9091 static int gaudi_set_binning_masks(struct hl_device *hdev) 9092 { 9093 return 0; 9094 } 9095 9096 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9097 { 9098 } 9099 9100 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9101 { 9102 struct hl_device *hdev = dev_get_drvdata(dev); 9103 struct cpucp_info *cpucp_info; 9104 9105 cpucp_info = &hdev->asic_prop.cpucp_info; 9106 9107 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9108 } 9109 9110 static DEVICE_ATTR_RO(infineon_ver); 9111 9112 static struct attribute *gaudi_vrm_dev_attrs[] = { 9113 &dev_attr_infineon_ver.attr, 9114 NULL, 9115 }; 9116 9117 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9118 struct attribute_group *dev_vrm_attr_grp) 9119 { 9120 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9121 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9122 } 9123 9124 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9125 { 9126 return 0; 9127 } 9128 9129 static const struct hl_asic_funcs gaudi_funcs = { 9130 .early_init = gaudi_early_init, 9131 .early_fini = gaudi_early_fini, 9132 .late_init = gaudi_late_init, 9133 .late_fini = gaudi_late_fini, 9134 .sw_init = gaudi_sw_init, 9135 .sw_fini = gaudi_sw_fini, 9136 .hw_init = gaudi_hw_init, 9137 .hw_fini = gaudi_hw_fini, 9138 .halt_engines = gaudi_halt_engines, 9139 .suspend = gaudi_suspend, 9140 .resume = gaudi_resume, 9141 .mmap = gaudi_mmap, 9142 .ring_doorbell = gaudi_ring_doorbell, 9143 .pqe_write = gaudi_pqe_write, 9144 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9145 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9146 .scrub_device_mem = gaudi_scrub_device_mem, 9147 .scrub_device_dram = gaudi_scrub_device_dram, 9148 .get_int_queue_base = gaudi_get_int_queue_base, 9149 .test_queues = gaudi_test_queues, 9150 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9151 .asic_dma_pool_free = gaudi_dma_pool_free, 9152 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9153 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9154 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 9155 .cs_parser = gaudi_cs_parser, 9156 .asic_dma_map_sgtable = hl_dma_map_sgtable, 9157 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9158 .update_eq_ci = gaudi_update_eq_ci, 9159 .context_switch = gaudi_context_switch, 9160 .restore_phase_topology = gaudi_restore_phase_topology, 9161 .debugfs_read_dma = gaudi_debugfs_read_dma, 9162 .add_device_attr = gaudi_add_device_attr, 9163 .handle_eqe = gaudi_handle_eqe, 9164 .get_events_stat = gaudi_get_events_stat, 9165 .read_pte = gaudi_read_pte, 9166 .write_pte = gaudi_write_pte, 9167 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9168 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9169 .mmu_prefetch_cache_range = NULL, 9170 .send_heartbeat = gaudi_send_heartbeat, 9171 .debug_coresight = gaudi_debug_coresight, 9172 .is_device_idle = gaudi_is_device_idle, 9173 .compute_reset_late_init = gaudi_compute_reset_late_init, 9174 .hw_queues_lock = gaudi_hw_queues_lock, 9175 .hw_queues_unlock = gaudi_hw_queues_unlock, 9176 .get_pci_id = gaudi_get_pci_id, 9177 .get_eeprom_data = gaudi_get_eeprom_data, 9178 .get_monitor_dump = gaudi_get_monitor_dump, 9179 .send_cpu_message = gaudi_send_cpu_message, 9180 .pci_bars_map = gaudi_pci_bars_map, 9181 .init_iatu = gaudi_init_iatu, 9182 .rreg = hl_rreg, 9183 .wreg = hl_wreg, 9184 .halt_coresight = gaudi_halt_coresight, 9185 .ctx_init = gaudi_ctx_init, 9186 .ctx_fini = gaudi_ctx_fini, 9187 .pre_schedule_cs = gaudi_pre_schedule_cs, 9188 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9189 .load_firmware_to_device = gaudi_load_firmware_to_device, 9190 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9191 .get_signal_cb_size = gaudi_get_signal_cb_size, 9192 .get_wait_cb_size = gaudi_get_wait_cb_size, 9193 .gen_signal_cb = gaudi_gen_signal_cb, 9194 .gen_wait_cb = gaudi_gen_wait_cb, 9195 .reset_sob = gaudi_reset_sob, 9196 .reset_sob_group = gaudi_reset_sob_group, 9197 .get_device_time = gaudi_get_device_time, 9198 .pb_print_security_errors = NULL, 9199 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9200 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9201 .get_dec_base_addr = NULL, 9202 .scramble_addr = hl_mmu_scramble_addr, 9203 .descramble_addr = hl_mmu_descramble_addr, 9204 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9205 .get_hw_block_id = gaudi_get_hw_block_id, 9206 .hw_block_mmap = gaudi_block_mmap, 9207 .enable_events_from_fw = gaudi_enable_events_from_fw, 9208 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9209 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9210 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9211 .init_firmware_loader = gaudi_init_firmware_loader, 9212 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9213 .state_dump_init = gaudi_state_dump_init, 9214 .get_sob_addr = gaudi_get_sob_addr, 9215 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9216 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9217 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9218 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9219 .access_dev_mem = hl_access_dev_mem, 9220 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9221 .send_device_activity = gaudi_send_device_activity, 9222 .set_dram_properties = gaudi_set_dram_properties, 9223 .set_binning_masks = gaudi_set_binning_masks, 9224 }; 9225 9226 /** 9227 * gaudi_set_asic_funcs - set GAUDI function pointers 9228 * 9229 * @hdev: pointer to hl_device structure 9230 * 9231 */ 9232 void gaudi_set_asic_funcs(struct hl_device *hdev) 9233 { 9234 hdev->asic_funcs = &gaudi_funcs; 9235 } 9236