1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudi2P.h" 9 #include "gaudi2_masks.h" 10 #include "../include/gaudi2/gaudi2_special_blocks.h" 11 #include "../include/hw_ip/mmu/mmu_general.h" 12 #include "../include/hw_ip/mmu/mmu_v2_0.h" 13 #include "../include/gaudi2/gaudi2_packets.h" 14 #include "../include/gaudi2/gaudi2_reg_map.h" 15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h" 16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h" 17 18 #include <linux/module.h> 19 #include <linux/pci.h> 20 #include <linux/hwmon.h> 21 #include <linux/iommu.h> 22 23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 24 25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 26 27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */ 28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ 30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ 31 #define GAUDI2_RESET_POLL_CNT 3 32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */ 33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */ 34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 35 #define GAUDI2_CB_POOL_CB_CNT 512 36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */ 37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */ 39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 41 42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3 43 44 /* 45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs 46 * and the code relies on that value (for array size etc..) we define another value 47 * for MAX faulty TPCs which reflects the cluster binning requirements 48 */ 49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1 50 #define MAX_FAULTY_XBARS 1 51 #define MAX_FAULTY_EDMAS 1 52 #define MAX_FAULTY_DECODERS 1 53 54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF 55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF 56 #define GAUDI2_DECODER_FULL_MASK 0x3FF 57 58 #define GAUDI2_NA_EVENT_CAUSE 0xFF 59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31 67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9 74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3 75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3 76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2 77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2 78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2 79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5 80 81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10) 82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200) 83 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000) 84 85 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */ 86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100) 87 88 #define KDMA_TIMEOUT_USEC USEC_PER_SEC 89 90 #define IS_DMA_IDLE(dma_core_sts0) \ 91 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK))) 92 93 #define IS_DMA_HALTED(dma_core_sts1) \ 94 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK)) 95 96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) 97 98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK)) 99 100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \ 101 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \ 102 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \ 103 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK))) 104 105 #define PCIE_DEC_EN_MASK 0x300 106 #define DEC_WORK_STATE_IDLE 0 107 #define DEC_WORK_STATE_PEND 3 108 #define IS_DEC_IDLE(dec_swreg15) \ 109 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \ 110 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND) 111 112 /* HBM MMU address scrambling parameters */ 113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M 114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26 115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0 116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK 117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16 118 #define MMU_RANGE_INV_VA_LSB_SHIFT 12 119 #define MMU_RANGE_INV_VA_MSB_SHIFT 44 120 #define MMU_RANGE_INV_EN_SHIFT 0 121 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 122 #define MMU_RANGE_INV_ASID_SHIFT 2 123 124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 125 * a 2 entries FIFO, and hence it is not enabled for it. 126 */ 127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 129 130 #define GAUDI2_MAX_STRING_LEN 64 131 132 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ 133 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) 134 135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) 136 137 /* RAZWI initiator coordinates */ 138 #define RAZWI_GET_AXUSER_XY(x) \ 139 ((x & 0xF8001FF0) >> 4) 140 141 #define RAZWI_GET_AXUSER_LOW_XY(x) \ 142 ((x & 0x00001FF0) >> 4) 143 144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0 145 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F 146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5 147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF 148 149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23 150 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F 151 152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \ 153 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \ 154 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT)) 155 156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \ 157 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT) 158 159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \ 160 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh)) 161 162 #define PSOC_RAZWI_ENG_STR_SIZE 128 163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5 164 165 struct gaudi2_razwi_info { 166 u32 axuser_xy; 167 u32 rtr_ctrl; 168 u16 eng_id; 169 char *eng_name; 170 }; 171 172 static struct gaudi2_razwi_info common_razwi_info[] = { 173 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE, 174 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"}, 175 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE, 176 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"}, 177 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE, 178 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"}, 179 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE, 180 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"}, 181 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE, 182 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"}, 183 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE, 184 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"}, 185 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE, 186 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"}, 187 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE, 188 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"}, 189 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE, 190 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"}, 191 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE, 192 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"}, 193 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE, 194 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"}, 195 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE, 196 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"}, 197 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE, 198 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"}, 199 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE, 200 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"}, 201 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE, 202 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"}, 203 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE, 204 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"}, 205 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE, 206 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"}, 207 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE, 208 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"}, 209 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE, 210 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"}, 211 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE, 212 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"}, 213 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE, 214 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"}, 215 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE, 216 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"}, 217 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE, 218 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"}, 219 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE, 220 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"}, 221 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE, 222 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"}, 223 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE, 224 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"}, 225 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE, 226 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"}, 227 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE, 228 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"}, 229 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE, 230 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"}, 231 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE, 232 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"}, 233 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE, 234 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"}, 235 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE, 236 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"}, 237 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE, 238 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"}, 239 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE, 240 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"}, 241 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE, 242 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"}, 243 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE, 244 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"}, 245 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE, 246 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"}, 247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE, 248 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"}, 249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE, 250 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"}, 251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE, 252 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"}, 253 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, 254 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"}, 255 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE, 256 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"}, 257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE, 258 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"}, 259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE, 260 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"}, 261 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE, 262 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"}, 263 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE, 264 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"}, 265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE, 266 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"}, 267 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE, 268 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"}, 269 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE, 270 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"}, 271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE, 272 GAUDI2_ENGINE_ID_SIZE, "PMMU"}, 273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE, 274 GAUDI2_ENGINE_ID_SIZE, "PCIE"}, 275 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE, 276 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"}, 277 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE, 278 GAUDI2_ENGINE_ID_KDMA, "KDMA"}, 279 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE, 280 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"}, 281 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE, 282 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"}, 283 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE, 284 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"}, 285 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE, 286 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"}, 287 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE, 288 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"}, 289 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE, 290 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"}, 291 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE, 292 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"}, 293 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE, 294 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"}, 295 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 296 GAUDI2_ENGINE_ID_SIZE, "HMMU0"}, 297 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 298 GAUDI2_ENGINE_ID_SIZE, "HMMU1"}, 299 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 300 GAUDI2_ENGINE_ID_SIZE, "HMMU2"}, 301 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 302 GAUDI2_ENGINE_ID_SIZE, "HMMU3"}, 303 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 304 GAUDI2_ENGINE_ID_SIZE, "HMMU4"}, 305 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 306 GAUDI2_ENGINE_ID_SIZE, "HMMU5"}, 307 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 308 GAUDI2_ENGINE_ID_SIZE, "HMMU6"}, 309 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 310 GAUDI2_ENGINE_ID_SIZE, "HMMU7"}, 311 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 312 GAUDI2_ENGINE_ID_SIZE, "HMMU8"}, 313 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 314 GAUDI2_ENGINE_ID_SIZE, "HMMU9"}, 315 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 316 GAUDI2_ENGINE_ID_SIZE, "HMMU10"}, 317 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 318 GAUDI2_ENGINE_ID_SIZE, "HMMU11"}, 319 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 320 GAUDI2_ENGINE_ID_SIZE, "HMMU12"}, 321 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 322 GAUDI2_ENGINE_ID_SIZE, "HMMU13"}, 323 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 324 GAUDI2_ENGINE_ID_SIZE, "HMMU14"}, 325 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 326 GAUDI2_ENGINE_ID_SIZE, "HMMU15"}, 327 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, 328 GAUDI2_ENGINE_ID_ROT_0, "ROT0"}, 329 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE, 330 GAUDI2_ENGINE_ID_ROT_1, "ROT1"}, 331 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, 332 GAUDI2_ENGINE_ID_PSOC, "CPU"}, 333 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE, 334 GAUDI2_ENGINE_ID_PSOC, "PSOC"} 335 }; 336 337 static struct gaudi2_razwi_info mme_razwi_info[] = { 338 /* MME X high coordinate is N/A, hence using only low coordinates */ 339 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE, 340 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"}, 341 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, 342 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"}, 343 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE, 344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"}, 345 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, 346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"}, 347 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE, 348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"}, 349 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE, 350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"}, 351 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE, 352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"}, 353 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE, 354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"}, 355 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, 356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"}, 357 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE, 358 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"}, 359 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, 360 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"}, 361 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE, 362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"}, 363 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, 364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"}, 365 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE, 366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"}, 367 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE, 368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"}, 369 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE, 370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"}, 371 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE, 372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"}, 373 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, 374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"}, 375 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE, 376 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"}, 377 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, 378 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"}, 379 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE, 380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"}, 381 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, 382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"}, 383 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE, 384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"}, 385 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE, 386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"}, 387 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE, 388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"}, 389 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE, 390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"}, 391 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, 392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"}, 393 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE, 394 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"}, 395 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, 396 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"}, 397 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE, 398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"}, 399 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, 400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"}, 401 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE, 402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"}, 403 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE, 404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"}, 405 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE, 406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"}, 407 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE, 408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"}, 409 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, 410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"} 411 }; 412 413 enum hl_pmmu_fatal_cause { 414 LATENCY_RD_OUT_FIFO_OVERRUN, 415 LATENCY_WR_OUT_FIFO_OVERRUN, 416 }; 417 418 enum hl_pcie_drain_ind_cause { 419 LBW_AXI_DRAIN_IND, 420 HBW_AXI_DRAIN_IND 421 }; 422 423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = { 424 [HBM_ID0] = 0xFFFC, 425 [HBM_ID1] = 0xFFCF, 426 [HBM_ID2] = 0xF7F7, 427 [HBM_ID3] = 0x7F7F, 428 [HBM_ID4] = 0xFCFF, 429 [HBM_ID5] = 0xCFFF, 430 }; 431 432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = { 433 [0] = HBM_ID0, 434 [1] = HBM_ID1, 435 [2] = HBM_ID4, 436 [3] = HBM_ID5, 437 }; 438 439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = { 440 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0, 441 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2, 442 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1, 443 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3, 444 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2, 445 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4, 446 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3, 447 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5, 448 }; 449 450 static const int gaudi2_qman_async_event_id[] = { 451 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM, 452 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM, 453 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM, 454 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM, 455 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM, 456 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM, 457 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM, 458 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM, 459 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM, 460 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM, 461 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM, 462 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM, 463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM, 464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM, 465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM, 466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM, 467 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM, 468 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM, 469 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM, 470 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM, 471 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM, 472 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM, 473 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM, 474 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM, 475 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM, 476 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM, 477 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM, 478 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM, 479 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM, 480 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM, 481 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM, 482 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM, 483 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM, 484 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM, 485 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM, 486 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM, 487 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM, 488 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM, 489 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM, 490 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM, 491 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM, 492 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM, 493 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM, 494 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM, 495 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM, 496 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM, 497 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM, 498 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM, 499 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM, 500 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM, 501 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM, 502 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM, 503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM, 504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM, 505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM, 506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM, 507 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM, 508 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM, 509 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM, 510 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM, 511 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM, 512 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM, 513 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM, 514 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM, 515 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM, 516 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM, 517 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM, 518 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM, 519 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM, 520 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM, 521 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM, 522 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM, 523 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM, 524 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM, 525 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM, 526 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM, 527 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM, 528 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM, 529 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM, 530 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM, 531 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM, 532 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM, 533 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM, 534 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM, 535 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM, 536 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM, 537 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM, 538 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM, 539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM, 540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM, 541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM, 542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM, 543 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM, 544 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM, 545 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM, 546 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM, 547 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM, 548 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM, 549 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM, 550 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM, 551 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM, 552 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM, 553 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM, 554 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM, 555 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM, 556 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM, 557 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM, 558 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM, 559 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM, 560 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM, 561 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM, 562 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM, 563 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM, 564 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM, 565 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM, 566 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM, 567 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM, 568 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM, 569 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM, 570 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM, 571 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM, 572 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM, 573 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM, 574 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM, 575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM, 576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM, 577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM, 578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM, 579 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM, 580 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM, 581 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM, 582 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM, 583 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM, 584 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM, 585 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM, 586 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM, 587 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM, 588 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM, 589 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM, 590 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM, 591 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM, 592 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM, 593 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM, 594 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM, 595 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM, 596 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM, 597 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM, 598 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM, 599 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM, 600 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM, 601 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM, 602 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM, 603 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM, 604 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM, 605 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM, 606 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM, 607 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0, 608 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0, 609 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0, 610 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0, 611 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1, 612 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1, 613 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1, 614 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1, 615 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0, 616 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0, 617 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0, 618 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0, 619 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1, 620 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1, 621 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1, 622 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1, 623 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0, 624 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0, 625 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0, 626 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0, 627 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1, 628 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1, 629 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1, 630 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1, 631 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0, 632 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0, 633 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0, 634 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0, 635 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1, 636 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1, 637 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1, 638 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1, 639 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0, 640 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0, 641 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0, 642 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0, 643 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1, 644 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1, 645 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1, 646 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1, 647 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0, 648 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0, 649 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0, 650 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0, 651 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1, 652 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1, 653 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1, 654 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1, 655 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0, 656 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0, 657 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0, 658 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0, 659 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1, 660 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1, 661 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1, 662 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1, 663 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0, 664 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0, 665 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0, 666 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0, 667 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1, 668 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1, 669 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1, 670 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1, 671 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0, 672 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0, 673 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0, 674 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0, 675 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1, 676 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1, 677 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1, 678 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1, 679 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0, 680 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0, 681 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0, 682 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0, 683 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1, 684 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1, 685 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1, 686 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1, 687 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0, 688 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0, 689 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0, 690 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0, 691 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1, 692 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1, 693 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1, 694 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1, 695 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0, 696 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0, 697 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0, 698 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0, 699 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1, 700 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1, 701 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1, 702 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1, 703 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 704 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 705 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 706 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 707 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 708 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 709 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 710 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM 711 }; 712 713 static const int gaudi2_dma_core_async_event_id[] = { 714 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE, 715 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE, 716 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE, 717 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE, 718 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE, 719 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE, 720 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE, 721 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE, 722 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE, 723 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE, 724 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 725 }; 726 727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 728 "qman sei intr", 729 "arc sei intr" 730 }; 731 732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = { 733 "AXI_TERMINATOR WR", 734 "AXI_TERMINATOR RD", 735 "AXI SPLIT SEI Status" 736 }; 737 738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = { 739 "cbu_bresp_sei_intr_cause", 740 "cbu_rresp_sei_intr_cause", 741 "lbu_bresp_sei_intr_cause", 742 "lbu_rresp_sei_intr_cause", 743 "cbu_axi_split_intr_cause", 744 "lbu_axi_split_intr_cause", 745 "arc_ip_excptn_sei_intr_cause", 746 "dmi_bresp_sei_intr_cause", 747 "aux2apb_err_sei_intr_cause", 748 "cfg_lbw_wr_terminated_intr_cause", 749 "cfg_lbw_rd_terminated_intr_cause", 750 "cfg_dccm_wr_terminated_intr_cause", 751 "cfg_dccm_rd_terminated_intr_cause", 752 "cfg_hbw_rd_terminated_intr_cause" 753 }; 754 755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = { 756 "msix_vcd_hbw_sei", 757 "msix_l2c_hbw_sei", 758 "msix_nrm_hbw_sei", 759 "msix_abnrm_hbw_sei", 760 "msix_vcd_lbw_sei", 761 "msix_l2c_lbw_sei", 762 "msix_nrm_lbw_sei", 763 "msix_abnrm_lbw_sei", 764 "apb_vcd_lbw_sei", 765 "apb_l2c_lbw_sei", 766 "apb_nrm_lbw_sei", 767 "apb_abnrm_lbw_sei", 768 "dec_sei", 769 "dec_apb_sei", 770 "trc_apb_sei", 771 "lbw_mstr_if_sei", 772 "axi_split_bresp_err_sei", 773 "hbw_axi_wr_viol_sei", 774 "hbw_axi_rd_viol_sei", 775 "lbw_axi_wr_viol_sei", 776 "lbw_axi_rd_viol_sei", 777 "vcd_spi", 778 "l2c_spi", 779 "nrm_spi", 780 "abnrm_spi", 781 }; 782 783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = { 784 "PQ AXI HBW error", 785 "CQ AXI HBW error", 786 "CP AXI HBW error", 787 "CP error due to undefined OPCODE", 788 "CP encountered STOP OPCODE", 789 "CP AXI LBW error", 790 "CP WRREG32 or WRBULK returned error", 791 "N/A", 792 "FENCE 0 inc over max value and clipped", 793 "FENCE 1 inc over max value and clipped", 794 "FENCE 2 inc over max value and clipped", 795 "FENCE 3 inc over max value and clipped", 796 "FENCE 0 dec under min value and clipped", 797 "FENCE 1 dec under min value and clipped", 798 "FENCE 2 dec under min value and clipped", 799 "FENCE 3 dec under min value and clipped", 800 "CPDMA Up overflow", 801 "PQC L2H error" 802 }; 803 804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { 805 "RSVD0", 806 "CQ AXI HBW error", 807 "CP AXI HBW error", 808 "CP error due to undefined OPCODE", 809 "CP encountered STOP OPCODE", 810 "CP AXI LBW error", 811 "CP WRREG32 or WRBULK returned error", 812 "N/A", 813 "FENCE 0 inc over max value and clipped", 814 "FENCE 1 inc over max value and clipped", 815 "FENCE 2 inc over max value and clipped", 816 "FENCE 3 inc over max value and clipped", 817 "FENCE 0 dec under min value and clipped", 818 "FENCE 1 dec under min value and clipped", 819 "FENCE 2 dec under min value and clipped", 820 "FENCE 3 dec under min value and clipped", 821 "CPDMA Up overflow", 822 "RSVD17", 823 "CQ_WR_IFIFO_CI_ERR", 824 "CQ_WR_CTL_CI_ERR", 825 "ARC_CQF_RD_ERR", 826 "ARC_CQ_WR_IFIFO_CI_ERR", 827 "ARC_CQ_WR_CTL_CI_ERR", 828 "ARC_AXI_ERR", 829 "CP_SWITCH_WDT_ERR" 830 }; 831 832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = { 833 "Choice push while full error", 834 "Choice Q watchdog error", 835 "MSG AXI LBW returned with error" 836 }; 837 838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = { 839 "qm_axi_err", 840 "qm_trace_fence_events", 841 "qm_sw_err", 842 "qm_cp_sw_stop", 843 "lbw_mstr_rresp_err", 844 "lbw_mstr_bresp_err", 845 "lbw_msg_slverr", 846 "hbw_msg_slverr", 847 "wbc_slverr", 848 "hbw_mstr_rresp_err", 849 "hbw_mstr_bresp_err", 850 "sb_resp_intr", 851 "mrsb_resp_intr", 852 "core_dw_status_0", 853 "core_dw_status_1", 854 "core_dw_status_2", 855 "core_dw_status_3", 856 "core_dw_status_4", 857 "core_dw_status_5", 858 "core_dw_status_6", 859 "core_dw_status_7", 860 "async_arc2cpu_sei_intr", 861 }; 862 863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = { 864 "tpc_address_exceed_slm", 865 "tpc_div_by_0", 866 "tpc_spu_mac_overflow", 867 "tpc_spu_addsub_overflow", 868 "tpc_spu_abs_overflow", 869 "tpc_spu_fma_fp_dst_nan", 870 "tpc_spu_fma_fp_dst_inf", 871 "tpc_spu_convert_fp_dst_nan", 872 "tpc_spu_convert_fp_dst_inf", 873 "tpc_spu_fp_dst_denorm", 874 "tpc_vpu_mac_overflow", 875 "tpc_vpu_addsub_overflow", 876 "tpc_vpu_abs_overflow", 877 "tpc_vpu_convert_fp_dst_nan", 878 "tpc_vpu_convert_fp_dst_inf", 879 "tpc_vpu_fma_fp_dst_nan", 880 "tpc_vpu_fma_fp_dst_inf", 881 "tpc_vpu_fp_dst_denorm", 882 "tpc_assertions", 883 "tpc_illegal_instruction", 884 "tpc_pc_wrap_around", 885 "tpc_qm_sw_err", 886 "tpc_hbw_rresp_err", 887 "tpc_hbw_bresp_err", 888 "tpc_lbw_rresp_err", 889 "tpc_lbw_bresp_err", 890 "st_unlock_already_locked", 891 "invalid_lock_access", 892 "LD_L protection violation", 893 "ST_L protection violation", 894 "D$ L0CS mismatch", 895 }; 896 897 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { 898 "agu_resp_intr", 899 "qman_axi_err", 900 "wap sei (wbc axi err)", 901 "arc sei", 902 "cfg access error", 903 "qm_sw_err", 904 "sbte_dbg_intr_0", 905 "sbte_dbg_intr_1", 906 "sbte_dbg_intr_2", 907 "sbte_dbg_intr_3", 908 "sbte_dbg_intr_4", 909 "sbte_prtn_intr_0", 910 "sbte_prtn_intr_1", 911 "sbte_prtn_intr_2", 912 "sbte_prtn_intr_3", 913 "sbte_prtn_intr_4", 914 }; 915 916 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { 917 "i0", 918 "i1", 919 "i2", 920 "i3", 921 "i4", 922 }; 923 924 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { 925 "WBC ERR RESP_0", 926 "WBC ERR RESP_1", 927 "AP SOURCE POS INF", 928 "AP SOURCE NEG INF", 929 "AP SOURCE NAN", 930 "AP RESULT POS INF", 931 "AP RESULT NEG INF", 932 }; 933 934 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 935 "HBW Read returned with error RRESP", 936 "HBW write returned with error BRESP", 937 "LBW write returned with error BRESP", 938 "descriptor_fifo_overflow", 939 "KDMA SB LBW Read returned with error", 940 "KDMA WBC LBW Write returned with error", 941 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 942 "WRONG CFG FOR COMMIT IN LIN DMA" 943 }; 944 945 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 946 "HBW/LBW Read returned with error RRESP", 947 "HBW/LBW write returned with error BRESP", 948 "LBW write returned with error BRESP", 949 "descriptor_fifo_overflow", 950 "KDMA SB LBW Read returned with error", 951 "KDMA WBC LBW Write returned with error", 952 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 953 "WRONG CFG FOR COMMIT IN LIN DMA" 954 }; 955 956 struct gaudi2_sm_sei_cause_data { 957 const char *cause_name; 958 const char *log_name; 959 }; 960 961 static const struct gaudi2_sm_sei_cause_data 962 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = { 963 {"calculated SO value overflow/underflow", "SOB ID"}, 964 {"payload address of monitor is not aligned to 4B", "monitor addr"}, 965 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"}, 966 }; 967 968 static const char * const 969 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = { 970 "LATENCY_RD_OUT_FIFO_OVERRUN", 971 "LATENCY_WR_OUT_FIFO_OVERRUN", 972 }; 973 974 static const char * const 975 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = { 976 "LATENCY_RD_OUT_FIFO_OVERRUN", 977 "LATENCY_WR_OUT_FIFO_OVERRUN", 978 }; 979 980 static const char * const 981 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = { 982 "AXI drain HBW", 983 "AXI drain LBW", 984 }; 985 986 static const char * const 987 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { 988 "HBW error response", 989 "LBW error response", 990 "TLP is blocked by RR" 991 }; 992 993 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { 994 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, 995 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, 996 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE, 997 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE, 998 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE, 999 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE, 1000 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE, 1001 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE, 1002 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE, 1003 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE, 1004 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE, 1005 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE, 1006 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE, 1007 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE, 1008 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE, 1009 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE, 1010 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE, 1011 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE, 1012 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE, 1013 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE, 1014 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE, 1015 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE, 1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE, 1017 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE, 1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE, 1019 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE, 1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE, 1021 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE, 1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE, 1023 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE, 1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE, 1025 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE, 1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE, 1027 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE, 1028 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE, 1029 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE, 1030 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE, 1031 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE, 1032 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE, 1033 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE, 1034 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE, 1035 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE, 1036 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE, 1037 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE, 1038 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE, 1039 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE, 1040 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE, 1041 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE, 1042 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE, 1043 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE, 1044 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE, 1045 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE, 1046 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE, 1047 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE, 1048 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE, 1049 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE, 1050 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE, 1051 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE, 1052 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE, 1053 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE, 1054 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE, 1055 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE, 1056 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE, 1057 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE, 1058 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE, 1059 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE, 1060 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE, 1061 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE, 1062 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE, 1063 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE, 1064 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE, 1065 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE, 1066 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE, 1067 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE, 1068 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE, 1069 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE, 1070 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE, 1071 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE, 1072 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE, 1073 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE, 1074 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE, 1075 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE, 1076 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE, 1077 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE, 1078 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE, 1079 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE, 1080 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE, 1081 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE, 1082 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE, 1083 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE, 1084 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE, 1085 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE, 1086 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE, 1087 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE, 1088 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE, 1089 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE, 1090 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE, 1091 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE, 1092 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE, 1093 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE, 1094 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE, 1095 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE, 1096 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE, 1097 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE, 1098 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE, 1099 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE, 1100 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE, 1101 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE, 1102 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE, 1103 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE, 1104 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE, 1105 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE, 1106 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE, 1107 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE, 1108 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE, 1109 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE, 1110 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE, 1111 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE, 1112 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE, 1113 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE, 1114 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE, 1115 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE, 1116 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE, 1117 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE, 1118 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE, 1119 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE, 1120 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE, 1121 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE, 1122 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE, 1123 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE, 1124 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE, 1125 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE, 1126 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE, 1127 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE, 1128 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE, 1129 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE, 1130 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE, 1131 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE, 1132 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE, 1133 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE, 1134 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE, 1135 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE, 1136 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE, 1137 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE, 1138 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE, 1139 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE, 1140 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE, 1141 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE, 1142 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE, 1143 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE, 1144 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE, 1145 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE, 1146 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE, 1147 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE, 1148 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE, 1149 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE, 1150 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE, 1151 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE, 1152 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE, 1153 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE, 1154 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE, 1155 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE, 1156 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE, 1157 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE, 1158 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE, 1159 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE, 1160 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE, 1161 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE, 1162 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE, 1163 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE, 1164 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE, 1165 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE, 1166 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE, 1167 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE, 1168 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE, 1169 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE, 1170 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE, 1171 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE, 1172 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE, 1173 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE, 1174 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE, 1175 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE, 1176 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE, 1177 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE, 1178 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE, 1179 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE, 1180 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE, 1181 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE, 1182 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE, 1183 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE, 1184 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE, 1185 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE, 1186 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE, 1187 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE, 1188 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE, 1189 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE, 1190 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE, 1191 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE, 1192 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE, 1193 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE, 1194 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE, 1195 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE, 1196 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE, 1197 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE, 1198 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE, 1199 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE, 1200 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE, 1201 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE, 1202 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE, 1203 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE, 1204 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE, 1205 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE, 1206 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE, 1207 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE, 1208 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE, 1209 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE, 1210 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE, 1211 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE, 1212 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE, 1213 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE, 1214 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE, 1215 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE, 1216 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE, 1217 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE, 1218 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE, 1219 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE, 1220 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE, 1221 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE, 1222 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE, 1223 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE, 1224 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE, 1225 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE, 1226 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE, 1227 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE, 1228 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE, 1229 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE, 1230 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE, 1231 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE, 1232 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE, 1233 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE, 1234 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE, 1235 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE, 1236 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE, 1237 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE, 1238 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE, 1239 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE, 1240 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE, 1241 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE, 1242 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE, 1243 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE, 1244 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE, 1245 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE, 1246 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE, 1247 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE, 1248 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE, 1249 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE, 1250 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE, 1251 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE, 1252 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE, 1253 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE 1254 }; 1255 1256 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = { 1257 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE, 1258 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE, 1259 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE, 1260 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE, 1261 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE, 1262 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE, 1263 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE, 1264 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE, 1265 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE, 1266 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE, 1267 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE, 1268 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE, 1269 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE, 1270 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE, 1271 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE, 1272 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE, 1273 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE, 1274 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE, 1275 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE, 1276 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE, 1277 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE, 1278 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE, 1279 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE, 1280 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE, 1281 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE, 1282 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE, 1283 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE, 1284 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE, 1285 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE, 1286 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE, 1287 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE, 1288 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE, 1289 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE, 1290 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE, 1291 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE, 1292 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE, 1293 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE, 1294 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE, 1295 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE, 1296 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE, 1297 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE, 1298 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE, 1299 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE, 1300 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE, 1301 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE, 1302 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE, 1303 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE, 1304 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE, 1305 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE, 1306 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE, 1307 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE, 1308 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE, 1309 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE, 1310 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE, 1311 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE, 1312 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE, 1313 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE, 1314 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE, 1315 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE, 1316 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE, 1317 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE, 1318 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE, 1319 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE, 1320 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE, 1321 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE, 1322 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE, 1323 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE, 1324 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE, 1325 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE, 1326 }; 1327 1328 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = { 1329 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE, 1330 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE, 1331 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE, 1332 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE, 1333 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE, 1334 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE, 1335 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE, 1336 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE, 1337 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE, 1338 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE, 1339 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE, 1340 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE, 1341 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE, 1342 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE, 1343 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE, 1344 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE, 1345 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE, 1346 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE, 1347 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE, 1348 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE, 1349 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE, 1350 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE, 1351 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE, 1352 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE, 1353 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE, 1354 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE, 1355 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE, 1356 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE, 1357 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE, 1358 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE, 1359 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE, 1360 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE, 1361 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE, 1362 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE, 1363 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE, 1364 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE, 1365 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE, 1366 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE, 1367 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE, 1368 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE, 1369 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE, 1370 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE, 1371 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE, 1372 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE, 1373 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE, 1374 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE, 1375 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE, 1376 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE, 1377 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE, 1378 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE, 1379 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE, 1380 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE, 1381 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE, 1382 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE, 1383 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE, 1384 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE, 1385 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE, 1386 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE, 1387 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE, 1388 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE, 1389 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE, 1390 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE, 1391 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE, 1392 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE, 1393 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE, 1394 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE, 1395 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE, 1396 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE, 1397 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE, 1398 }; 1399 1400 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = { 1401 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE, 1402 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE, 1403 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE, 1404 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE, 1405 }; 1406 1407 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = { 1408 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0, 1409 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0, 1410 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0, 1411 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0, 1412 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1, 1413 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1, 1414 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1, 1415 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1, 1416 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0, 1417 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0, 1418 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0, 1419 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0, 1420 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1, 1421 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1, 1422 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1, 1423 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1, 1424 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0, 1425 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0, 1426 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0, 1427 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0, 1428 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0, 1429 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0, 1430 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0, 1431 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0, 1432 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1, 1433 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1, 1434 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1, 1435 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1, 1436 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2, 1437 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2, 1438 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2, 1439 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2, 1440 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3, 1441 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3, 1442 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3, 1443 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3, 1444 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4, 1445 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4, 1446 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4, 1447 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4, 1448 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5, 1449 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5, 1450 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5, 1451 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5, 1452 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24, 1453 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24, 1454 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24, 1455 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24, 1456 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2, 1457 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2, 1458 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2, 1459 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2, 1460 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3, 1461 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3, 1462 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3, 1463 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3, 1464 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4, 1465 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4, 1466 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4, 1467 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4, 1468 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6, 1469 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6, 1470 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6, 1471 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6, 1472 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7, 1473 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7, 1474 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7, 1475 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7, 1476 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8, 1477 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8, 1478 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8, 1479 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8, 1480 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9, 1481 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9, 1482 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9, 1483 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9, 1484 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10, 1485 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10, 1486 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10, 1487 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10, 1488 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11, 1489 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11, 1490 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11, 1491 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11, 1492 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4, 1493 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4, 1494 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4, 1495 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4, 1496 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5, 1497 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5, 1498 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5, 1499 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5, 1500 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1, 1501 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1, 1502 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1, 1503 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1, 1504 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12, 1505 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12, 1506 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12, 1507 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12, 1508 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13, 1509 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13, 1510 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13, 1511 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13, 1512 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14, 1513 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14, 1514 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14, 1515 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14, 1516 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15, 1517 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15, 1518 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15, 1519 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15, 1520 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16, 1521 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16, 1522 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16, 1523 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16, 1524 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17, 1525 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17, 1526 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17, 1527 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17, 1528 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6, 1529 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6, 1530 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6, 1531 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6, 1532 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7, 1533 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7, 1534 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7, 1535 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7, 1536 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5, 1537 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5, 1538 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5, 1539 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5, 1540 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18, 1541 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18, 1542 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18, 1543 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18, 1544 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19, 1545 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19, 1546 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19, 1547 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19, 1548 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20, 1549 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20, 1550 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20, 1551 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20, 1552 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21, 1553 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21, 1554 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21, 1555 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21, 1556 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22, 1557 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22, 1558 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22, 1559 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22, 1560 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23, 1561 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23, 1562 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23, 1563 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23, 1564 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0, 1565 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0, 1566 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0, 1567 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0, 1568 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1, 1569 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1, 1570 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1, 1571 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1, 1572 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2, 1573 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2, 1574 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2, 1575 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2, 1576 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3, 1577 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3, 1578 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3, 1579 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3, 1580 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4, 1581 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4, 1582 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4, 1583 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4, 1584 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5, 1585 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5, 1586 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5, 1587 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5, 1588 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6, 1589 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6, 1590 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6, 1591 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6, 1592 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7, 1593 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7, 1594 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7, 1595 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7, 1596 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8, 1597 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8, 1598 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8, 1599 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8, 1600 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9, 1601 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9, 1602 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9, 1603 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9, 1604 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10, 1605 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10, 1606 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10, 1607 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10, 1608 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11, 1609 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11, 1610 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11, 1611 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11, 1612 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12, 1613 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12, 1614 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12, 1615 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12, 1616 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13, 1617 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13, 1618 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13, 1619 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13, 1620 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14, 1621 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14, 1622 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14, 1623 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14, 1624 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15, 1625 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15, 1626 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15, 1627 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15, 1628 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16, 1629 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16, 1630 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16, 1631 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16, 1632 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17, 1633 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17, 1634 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17, 1635 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17, 1636 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18, 1637 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18, 1638 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18, 1639 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18, 1640 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19, 1641 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19, 1642 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19, 1643 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19, 1644 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20, 1645 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20, 1646 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20, 1647 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20, 1648 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21, 1649 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21, 1650 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21, 1651 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21, 1652 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22, 1653 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22, 1654 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22, 1655 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22, 1656 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23, 1657 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23, 1658 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23, 1659 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23, 1660 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0, 1661 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0, 1662 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0, 1663 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0, 1664 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1, 1665 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1, 1666 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1, 1667 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1 1668 }; 1669 1670 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = { 1671 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE, 1672 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE, 1673 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE, 1674 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE, 1675 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE, 1676 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE, 1677 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE, 1678 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE, 1679 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE, 1680 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE, 1681 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE 1682 }; 1683 1684 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = { 1685 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE, 1686 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE, 1687 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE, 1688 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE 1689 }; 1690 1691 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { 1692 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE, 1693 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE, 1694 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE, 1695 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE, 1696 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE, 1697 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE, 1698 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE, 1699 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE, 1700 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE, 1701 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE, 1702 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE, 1703 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE, 1704 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE, 1705 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE, 1706 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE, 1707 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE, 1708 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE, 1709 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE, 1710 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE, 1711 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE, 1712 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE, 1713 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE, 1714 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE, 1715 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE, 1716 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, 1717 }; 1718 1719 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = { 1720 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE, 1721 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE, 1722 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE, 1723 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE, 1724 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE, 1725 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE, 1726 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE, 1727 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE, 1728 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE, 1729 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE, 1730 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE, 1731 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE, 1732 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE, 1733 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE, 1734 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE, 1735 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE, 1736 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE, 1737 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE, 1738 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE, 1739 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE, 1740 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE, 1741 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE, 1742 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE, 1743 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE, 1744 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE, 1745 }; 1746 1747 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { 1748 [ROTATOR_ID_0] = mmROT0_BASE, 1749 [ROTATOR_ID_1] = mmROT1_BASE 1750 }; 1751 1752 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = { 1753 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0, 1754 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0, 1755 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0, 1756 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0, 1757 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0, 1758 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0, 1759 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0, 1760 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0, 1761 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0, 1762 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0, 1763 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0, 1764 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0, 1765 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0, 1766 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0, 1767 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0, 1768 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0, 1769 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0, 1770 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0, 1771 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0, 1772 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0, 1773 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0, 1774 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0, 1775 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0, 1776 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0, 1777 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0, 1778 }; 1779 1780 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { 1781 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0, 1782 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, 1783 }; 1784 1785 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = { 1786 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0, 1787 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1, 1788 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2, 1789 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3, 1790 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4, 1791 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5, 1792 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0, 1793 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1, 1794 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2, 1795 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3, 1796 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4, 1797 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5, 1798 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0, 1799 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1, 1800 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2, 1801 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3, 1802 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4, 1803 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5, 1804 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0, 1805 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1, 1806 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2, 1807 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3, 1808 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4, 1809 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5, 1810 /* the PCI TPC is placed last (mapped liked HW) */ 1811 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6, 1812 }; 1813 1814 static const u32 gaudi2_mme_engine_id_to_mme_id[] = { 1815 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0, 1816 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1, 1817 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2, 1818 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3, 1819 }; 1820 1821 static const u32 gaudi2_edma_engine_id_to_edma_id[] = { 1822 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0, 1823 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1, 1824 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0, 1825 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1, 1826 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2, 1827 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3, 1828 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4, 1829 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5, 1830 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6, 1831 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7, 1832 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA, 1833 }; 1834 1835 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1836 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 1837 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, 1838 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 1839 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0, 1840 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 1841 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0, 1842 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0, 1843 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0, 1844 }; 1845 1846 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = { 1847 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal", 1848 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal", 1849 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal", 1850 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal", 1851 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal", 1852 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal", 1853 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal", 1854 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal", 1855 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal", 1856 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" 1857 }; 1858 1859 enum rtr_id { 1860 DCORE0_RTR0, 1861 DCORE0_RTR1, 1862 DCORE0_RTR2, 1863 DCORE0_RTR3, 1864 DCORE0_RTR4, 1865 DCORE0_RTR5, 1866 DCORE0_RTR6, 1867 DCORE0_RTR7, 1868 DCORE1_RTR0, 1869 DCORE1_RTR1, 1870 DCORE1_RTR2, 1871 DCORE1_RTR3, 1872 DCORE1_RTR4, 1873 DCORE1_RTR5, 1874 DCORE1_RTR6, 1875 DCORE1_RTR7, 1876 DCORE2_RTR0, 1877 DCORE2_RTR1, 1878 DCORE2_RTR2, 1879 DCORE2_RTR3, 1880 DCORE2_RTR4, 1881 DCORE2_RTR5, 1882 DCORE2_RTR6, 1883 DCORE2_RTR7, 1884 DCORE3_RTR0, 1885 DCORE3_RTR1, 1886 DCORE3_RTR2, 1887 DCORE3_RTR3, 1888 DCORE3_RTR4, 1889 DCORE3_RTR5, 1890 DCORE3_RTR6, 1891 DCORE3_RTR7, 1892 }; 1893 1894 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1895 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1896 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1897 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1898 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1899 DCORE0_RTR0 1900 }; 1901 1902 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1903 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, 1904 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, 1905 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0, 1906 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7, 1907 DCORE0_RTR0 1908 }; 1909 1910 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = { 1911 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1912 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1913 }; 1914 1915 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = { 1916 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1, 1917 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0 1918 }; 1919 1920 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = { 1921 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1922 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1923 }; 1924 1925 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = { 1926 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1927 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1928 }; 1929 1930 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1931 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1932 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1933 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1934 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1935 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1936 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1937 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1938 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE 1939 }; 1940 1941 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = { 1942 DCORE0_RTR0, DCORE0_RTR0 1943 }; 1944 1945 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = { 1946 DCORE0_RTR2, DCORE0_RTR2 1947 }; 1948 1949 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = { 1950 DCORE2_RTR0, DCORE3_RTR7 1951 }; 1952 1953 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = { 1954 DCORE2_RTR2, DCORE3_RTR5 1955 }; 1956 1957 struct mme_initiators_rtr_id { 1958 u32 wap0; 1959 u32 wap1; 1960 u32 write; 1961 u32 read; 1962 u32 sbte0; 1963 u32 sbte1; 1964 u32 sbte2; 1965 u32 sbte3; 1966 u32 sbte4; 1967 }; 1968 1969 enum mme_initiators { 1970 MME_WAP0 = 0, 1971 MME_WAP1, 1972 MME_WRITE, 1973 MME_READ, 1974 MME_SBTE0, 1975 MME_SBTE1, 1976 MME_SBTE2, 1977 MME_SBTE3, 1978 MME_SBTE4, 1979 MME_INITIATORS_MAX 1980 }; 1981 1982 static const struct mme_initiators_rtr_id 1983 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = { 1984 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7, 1985 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6}, 1986 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8, 1987 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8}, 1988 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23, 1989 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23}, 1990 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30, 1991 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28}, 1992 }; 1993 1994 enum razwi_event_sources { 1995 RAZWI_TPC, 1996 RAZWI_MME, 1997 RAZWI_EDMA, 1998 RAZWI_PDMA, 1999 RAZWI_NIC, 2000 RAZWI_DEC, 2001 RAZWI_ROT 2002 }; 2003 2004 struct hbm_mc_error_causes { 2005 u32 mask; 2006 char cause[50]; 2007 }; 2008 2009 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS; 2010 2011 /* Special blocks iterator is currently used to configure security protection bits, 2012 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)- 2013 * must be skipped. Following configurations are commonly used for both PB config 2014 * and global error reading, since currently they both share the same settings. 2015 * Once it changes, we must remember to use separate configurations for either one. 2016 */ 2017 static int gaudi2_iterator_skip_block_types[] = { 2018 GAUDI2_BLOCK_TYPE_PLL, 2019 GAUDI2_BLOCK_TYPE_EU_BIST, 2020 GAUDI2_BLOCK_TYPE_HBM, 2021 GAUDI2_BLOCK_TYPE_XFT 2022 }; 2023 2024 static struct range gaudi2_iterator_skip_block_ranges[] = { 2025 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */ 2026 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE}, 2027 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE}, 2028 /* Skip all CPU blocks except for CPU_IF */ 2029 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE}, 2030 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE} 2031 }; 2032 2033 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { 2034 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, 2035 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, 2036 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"}, 2037 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"}, 2038 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"}, 2039 }; 2040 2041 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = { 2042 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even", 2043 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd", 2044 [HBM_SEI_READ_ERR] = "SEI read data error", 2045 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error", 2046 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted", 2047 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail", 2048 [HBM_SEI_DFI] = "SEI DFI error", 2049 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read", 2050 [HBM_SEI_BIST_FAIL] = "SEI BIST fail" 2051 }; 2052 2053 struct mmu_spi_sei_cause { 2054 char cause[50]; 2055 int clear_bit; 2056 }; 2057 2058 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = { 2059 {"page fault", 1}, /* INTERRUPT_CLR[1] */ 2060 {"page access", 1}, /* INTERRUPT_CLR[1] */ 2061 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */ 2062 {"multi hit", 2}, /* INTERRUPT_CLR[2] */ 2063 {"mmu rei0", -1}, /* no clear register bit */ 2064 {"mmu rei1", -1}, /* no clear register bit */ 2065 {"stlb rei0", -1}, /* no clear register bit */ 2066 {"stlb rei1", -1}, /* no clear register bit */ 2067 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */ 2068 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */ 2069 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */ 2070 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */ 2071 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2072 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2073 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2074 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2075 {"slave error", 16}, /* INTERRUPT_CLR[16] */ 2076 {"dec error", 17}, /* INTERRUPT_CLR[17] */ 2077 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */ 2078 }; 2079 2080 struct gaudi2_cache_invld_params { 2081 u64 start_va; 2082 u64 end_va; 2083 u32 inv_start_val; 2084 u32 flags; 2085 bool range_invalidation; 2086 }; 2087 2088 struct gaudi2_tpc_idle_data { 2089 struct engines_data *e; 2090 unsigned long *mask; 2091 bool *is_idle; 2092 const char *tpc_fmt; 2093 }; 2094 2095 struct gaudi2_tpc_mmu_data { 2096 u32 rw_asid; 2097 }; 2098 2099 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0}; 2100 2101 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val); 2102 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id); 2103 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id); 2104 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id); 2105 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); 2106 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); 2107 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, 2108 bool is_memset); 2109 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 2110 struct engines_data *e); 2111 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 2112 struct engines_data *e); 2113 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 2114 struct engines_data *e); 2115 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); 2116 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr); 2117 2118 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) 2119 { 2120 2121 } 2122 2123 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev) 2124 { 2125 return sizeof(struct packet_msg_short); 2126 } 2127 2128 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev) 2129 { 2130 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence); 2131 } 2132 2133 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) 2134 { 2135 struct asic_fixed_properties *prop = &hdev->asic_prop; 2136 int dcore, inst, tpc_seq; 2137 u32 offset; 2138 2139 /* init the return code */ 2140 ctx->rc = 0; 2141 2142 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 2143 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 2144 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 2145 2146 if (!(prop->tpc_enabled_mask & BIT(tpc_seq))) 2147 continue; 2148 2149 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 2150 2151 ctx->fn(hdev, dcore, inst, offset, ctx); 2152 if (ctx->rc) { 2153 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 2154 dcore, inst); 2155 return; 2156 } 2157 } 2158 } 2159 2160 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6))) 2161 return; 2162 2163 /* special check for PCI TPC (DCORE0_TPC6) */ 2164 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 2165 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 2166 if (ctx->rc) 2167 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 2168 } 2169 2170 static bool gaudi2_host_phys_addr_valid(u64 addr) 2171 { 2172 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1)) 2173 return true; 2174 2175 return false; 2176 } 2177 2178 static int set_number_of_functional_hbms(struct hl_device *hdev) 2179 { 2180 struct asic_fixed_properties *prop = &hdev->asic_prop; 2181 u8 faulty_hbms = hweight64(hdev->dram_binning); 2182 2183 /* check if all HBMs should be used */ 2184 if (!faulty_hbms) { 2185 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n"); 2186 prop->num_functional_hbms = GAUDI2_HBM_NUM; 2187 return 0; 2188 } 2189 2190 /* 2191 * check for error condition in which number of binning 2192 * candidates is higher than the maximum supported by the 2193 * driver (in which case binning mask shall be ignored and driver will 2194 * set the default) 2195 */ 2196 if (faulty_hbms > MAX_FAULTY_HBMS) { 2197 dev_err(hdev->dev, 2198 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n", 2199 MAX_FAULTY_HBMS, hdev->dram_binning); 2200 return -EINVAL; 2201 } 2202 2203 /* 2204 * by default, number of functional HBMs in Gaudi2 is always 2205 * GAUDI2_HBM_NUM - 1. 2206 */ 2207 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms; 2208 return 0; 2209 } 2210 2211 static int gaudi2_set_dram_properties(struct hl_device *hdev) 2212 { 2213 struct asic_fixed_properties *prop = &hdev->asic_prop; 2214 u32 basic_hbm_page_size; 2215 int rc; 2216 2217 rc = set_number_of_functional_hbms(hdev); 2218 if (rc) 2219 return -EINVAL; 2220 2221 /* 2222 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround 2223 * in which we are using x16 bigger page size to be able to populate the entire 2224 * HBM mappings in the TLB 2225 */ 2226 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M; 2227 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size; 2228 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 2229 prop->dram_size = prop->num_functional_hbms * SZ_16G; 2230 prop->dram_base_address = DRAM_PHYS_BASE; 2231 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 2232 prop->dram_supports_virtual_memory = true; 2233 2234 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size; 2235 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK; 2236 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START; 2237 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END; 2238 2239 /* since DRAM page size differs from DMMU page size we need to allocate 2240 * DRAM memory in units of dram_page size and mapping this memory in 2241 * units of DMMU page size. we overcome this size mismatch using a 2242 * scrambling routine which takes a DRAM page and converts it to a DMMU 2243 * page. 2244 * We therefore: 2245 * 1. partition the virtual address space to DRAM-page (whole) pages. 2246 * (suppose we get n such pages) 2247 * 2. limit the amount of virtual address space we got from 1 above to 2248 * a multiple of 64M as we don't want the scrambled address to cross 2249 * the DRAM virtual address space. 2250 * ( m = (n * DRAM_page_size) / DMMU_page_size). 2251 * 3. determine the and address accordingly 2252 * end_addr = start_addr + m * 48M 2253 * 2254 * the DRAM address MSBs (63:48) are not part of the roundup calculation 2255 */ 2256 prop->dmmu.start_addr = prop->dram_base_address + 2257 (prop->dram_page_size * 2258 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); 2259 2260 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * 2261 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); 2262 2263 return 0; 2264 } 2265 2266 static int gaudi2_set_fixed_properties(struct hl_device *hdev) 2267 { 2268 struct asic_fixed_properties *prop = &hdev->asic_prop; 2269 struct hw_queue_properties *q_props; 2270 u32 num_sync_stream_queues = 0; 2271 int i; 2272 2273 prop->max_queues = GAUDI2_QUEUE_ID_SIZE; 2274 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), 2275 GFP_KERNEL); 2276 2277 if (!prop->hw_queues_props) 2278 return -ENOMEM; 2279 2280 q_props = prop->hw_queues_props; 2281 2282 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { 2283 q_props[i].type = QUEUE_TYPE_HW; 2284 q_props[i].driver_only = 0; 2285 2286 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) { 2287 q_props[i].supports_sync_stream = 0; 2288 } else { 2289 q_props[i].supports_sync_stream = 1; 2290 num_sync_stream_queues++; 2291 } 2292 2293 q_props[i].cb_alloc_flags = CB_ALLOC_USER; 2294 } 2295 2296 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; 2297 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1; 2298 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL; 2299 2300 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 2301 prop->cfg_base_address = CFG_BASE; 2302 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0; 2303 prop->host_base_address = HOST_PHYS_BASE_0; 2304 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0; 2305 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS; 2306 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER; 2307 prop->user_dec_intr_count = NUMBER_OF_DEC; 2308 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1; 2309 prop->completion_mode = HL_COMPLETION_MODE_CS; 2310 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER; 2311 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER; 2312 2313 prop->sram_base_address = SRAM_BASE_ADDR; 2314 prop->sram_size = SRAM_SIZE; 2315 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 2316 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET; 2317 2318 prop->hints_range_reservation = true; 2319 2320 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1; 2321 2322 if (hdev->pldm) 2323 prop->mmu_pgt_size = 0x800000; /* 8MB */ 2324 else 2325 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; 2326 2327 prop->mmu_pte_size = HL_PTE_SIZE; 2328 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 2329 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 2330 2331 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; 2332 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; 2333 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; 2334 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; 2335 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; 2336 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; 2337 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; 2338 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; 2339 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; 2340 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; 2341 prop->dmmu.page_size = PAGE_SIZE_1GB; 2342 prop->dmmu.num_hops = MMU_ARCH_6_HOPS; 2343 prop->dmmu.last_mask = LAST_MASK; 2344 prop->dmmu.host_resident = 1; 2345 prop->dmmu.hop_table_size = prop->mmu_hop_table_size; 2346 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 2347 2348 /* 2349 * this is done in order to be able to validate FW descriptor (i.e. validating that 2350 * the addresses and allocated space for FW image does not cross memory bounds). 2351 * for this reason we set the DRAM size to the minimum possible and later it will 2352 * be modified according to what reported in the cpucp info packet 2353 */ 2354 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; 2355 2356 hdev->pmmu_huge_range = true; 2357 prop->pmmu.host_resident = 1; 2358 prop->pmmu.num_hops = MMU_ARCH_6_HOPS; 2359 prop->pmmu.last_mask = LAST_MASK; 2360 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 2361 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 2362 2363 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; 2364 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; 2365 prop->hints_host_hpage_reserved_va_range.start_addr = 2366 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START; 2367 prop->hints_host_hpage_reserved_va_range.end_addr = 2368 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END; 2369 2370 if (PAGE_SIZE == SZ_64K) { 2371 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K; 2372 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K; 2373 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K; 2374 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K; 2375 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K; 2376 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K; 2377 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K; 2378 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K; 2379 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K; 2380 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K; 2381 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K; 2382 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K; 2383 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2384 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2385 prop->pmmu.page_size = PAGE_SIZE_64KB; 2386 2387 /* shifts and masks are the same in PMMU and HPMMU */ 2388 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2389 prop->pmmu_huge.page_size = PAGE_SIZE_16MB; 2390 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2391 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2392 } else { 2393 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K; 2394 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K; 2395 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K; 2396 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K; 2397 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K; 2398 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K; 2399 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K; 2400 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K; 2401 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K; 2402 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K; 2403 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K; 2404 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K; 2405 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2406 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2407 prop->pmmu.page_size = PAGE_SIZE_4KB; 2408 2409 /* shifts and masks are the same in PMMU and HPMMU */ 2410 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2411 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 2412 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2413 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2414 } 2415 2416 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE; 2417 prop->num_engine_cores = CPU_ID_MAX; 2418 prop->cfg_size = CFG_SIZE; 2419 prop->max_asid = MAX_ASID; 2420 prop->num_of_events = GAUDI2_EVENT_SIZE; 2421 2422 prop->supports_engine_modes = true; 2423 2424 prop->dc_power_default = DC_POWER_DEFAULT; 2425 2426 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; 2427 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE; 2428 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; 2429 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 2430 2431 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2432 2433 prop->mme_master_slave_mode = 1; 2434 2435 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER + 2436 (num_sync_stream_queues * HL_RSVD_SOBS); 2437 2438 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER + 2439 (num_sync_stream_queues * HL_RSVD_MONS); 2440 2441 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; 2442 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT; 2443 prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE; 2444 2445 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; 2446 2447 prop->fw_cpu_boot_dev_sts0_valid = false; 2448 prop->fw_cpu_boot_dev_sts1_valid = false; 2449 prop->hard_reset_done_by_fw = false; 2450 prop->gic_interrupts_enable = true; 2451 2452 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2453 2454 prop->max_dec = NUMBER_OF_DEC; 2455 2456 prop->clk_pll_index = HL_GAUDI2_MME_PLL; 2457 2458 prop->dma_mask = 64; 2459 2460 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; 2461 2462 return 0; 2463 } 2464 2465 static int gaudi2_pci_bars_map(struct hl_device *hdev) 2466 { 2467 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"}; 2468 bool is_wc[3] = {false, false, true}; 2469 int rc; 2470 2471 rc = hl_pci_bars_map(hdev, name, is_wc); 2472 if (rc) 2473 return rc; 2474 2475 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR); 2476 2477 return 0; 2478 } 2479 2480 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 2481 { 2482 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2483 struct hl_inbound_pci_region pci_region; 2484 u64 old_addr = addr; 2485 int rc; 2486 2487 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr)) 2488 return old_addr; 2489 2490 if (hdev->asic_prop.iatu_done_by_fw) 2491 return U64_MAX; 2492 2493 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2494 pci_region.mode = PCI_BAR_MATCH_MODE; 2495 pci_region.bar = DRAM_BAR_ID; 2496 pci_region.addr = addr; 2497 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 2498 if (rc) 2499 return U64_MAX; 2500 2501 if (gaudi2) { 2502 old_addr = gaudi2->dram_bar_cur_addr; 2503 gaudi2->dram_bar_cur_addr = addr; 2504 } 2505 2506 return old_addr; 2507 } 2508 2509 static int gaudi2_init_iatu(struct hl_device *hdev) 2510 { 2511 struct hl_inbound_pci_region inbound_region; 2512 struct hl_outbound_pci_region outbound_region; 2513 u32 bar_addr_low, bar_addr_high; 2514 int rc; 2515 2516 if (hdev->asic_prop.iatu_done_by_fw) 2517 return 0; 2518 2519 /* Temporary inbound Region 0 - Bar 0 - Point to CFG 2520 * We must map this region in BAR match mode in order to 2521 * fetch BAR physical base address 2522 */ 2523 inbound_region.mode = PCI_BAR_MATCH_MODE; 2524 inbound_region.bar = SRAM_CFG_BAR_ID; 2525 /* Base address must be aligned to Bar size which is 256 MB */ 2526 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF; 2527 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2528 if (rc) 2529 return rc; 2530 2531 /* Fetch physical BAR address */ 2532 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF); 2533 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF; 2534 2535 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low; 2536 2537 /* Inbound Region 0 - Bar 0 - Point to CFG */ 2538 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2539 inbound_region.bar = SRAM_CFG_BAR_ID; 2540 inbound_region.offset_in_bar = 0; 2541 inbound_region.addr = STM_FLASH_BASE_ADDR; 2542 inbound_region.size = CFG_REGION_SIZE; 2543 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2544 if (rc) 2545 return rc; 2546 2547 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */ 2548 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2549 inbound_region.bar = SRAM_CFG_BAR_ID; 2550 inbound_region.offset_in_bar = CFG_REGION_SIZE; 2551 inbound_region.addr = BAR0_RSRVD_BASE_ADDR; 2552 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE; 2553 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 2554 if (rc) 2555 return rc; 2556 2557 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2558 inbound_region.mode = PCI_BAR_MATCH_MODE; 2559 inbound_region.bar = DRAM_BAR_ID; 2560 inbound_region.addr = DRAM_PHYS_BASE; 2561 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 2562 if (rc) 2563 return rc; 2564 2565 /* Outbound Region 0 - Point to Host */ 2566 outbound_region.addr = HOST_PHYS_BASE_0; 2567 outbound_region.size = HOST_PHYS_SIZE_0; 2568 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 2569 2570 return rc; 2571 } 2572 2573 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev) 2574 { 2575 return RREG32(mmHW_STATE); 2576 } 2577 2578 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev) 2579 { 2580 struct asic_fixed_properties *prop = &hdev->asic_prop; 2581 2582 /* 2583 * check for error condition in which number of binning candidates 2584 * is higher than the maximum supported by the driver 2585 */ 2586 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) { 2587 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n", 2588 MAX_CLUSTER_BINNING_FAULTY_TPCS, 2589 hdev->tpc_binning); 2590 return -EINVAL; 2591 } 2592 2593 prop->tpc_binning_mask = hdev->tpc_binning; 2594 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK; 2595 2596 return 0; 2597 } 2598 2599 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev) 2600 { 2601 struct asic_fixed_properties *prop = &hdev->asic_prop; 2602 struct hw_queue_properties *q_props = prop->hw_queues_props; 2603 u64 tpc_binning_mask; 2604 u8 subst_idx = 0; 2605 int i, rc; 2606 2607 rc = gaudi2_tpc_binning_init_prop(hdev); 2608 if (rc) 2609 return rc; 2610 2611 tpc_binning_mask = prop->tpc_binning_mask; 2612 2613 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) { 2614 u8 subst_seq, binned, qid_base; 2615 2616 if (tpc_binning_mask == 0) 2617 break; 2618 2619 if (subst_idx == 0) { 2620 subst_seq = TPC_ID_DCORE0_TPC6; 2621 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 2622 } else { 2623 subst_seq = TPC_ID_DCORE3_TPC5; 2624 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0; 2625 } 2626 2627 2628 /* clear bit from mask */ 2629 binned = __ffs(tpc_binning_mask); 2630 /* 2631 * Coverity complains about possible out-of-bound access in 2632 * clear_bit 2633 */ 2634 if (binned >= TPC_ID_SIZE) { 2635 dev_err(hdev->dev, 2636 "Invalid binned TPC (binning mask: %llx)\n", 2637 tpc_binning_mask); 2638 return -EINVAL; 2639 } 2640 clear_bit(binned, (unsigned long *)&tpc_binning_mask); 2641 2642 /* also clear replacing TPC bit from enabled mask */ 2643 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask); 2644 2645 /* bin substite TPC's Qs */ 2646 q_props[qid_base].binned = 1; 2647 q_props[qid_base + 1].binned = 1; 2648 q_props[qid_base + 2].binned = 1; 2649 q_props[qid_base + 3].binned = 1; 2650 2651 subst_idx++; 2652 } 2653 2654 return 0; 2655 } 2656 2657 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev) 2658 { 2659 struct asic_fixed_properties *prop = &hdev->asic_prop; 2660 u8 num_faulty; 2661 2662 num_faulty = hweight32(hdev->decoder_binning); 2663 2664 /* 2665 * check for error condition in which number of binning candidates 2666 * is higher than the maximum supported by the driver 2667 */ 2668 if (num_faulty > MAX_FAULTY_DECODERS) { 2669 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n", 2670 hdev->decoder_binning); 2671 return -EINVAL; 2672 } 2673 2674 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK); 2675 2676 if (prop->decoder_binning_mask) 2677 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1)); 2678 else 2679 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK; 2680 2681 return 0; 2682 } 2683 2684 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev) 2685 { 2686 struct asic_fixed_properties *prop = &hdev->asic_prop; 2687 2688 /* check if we should override default binning */ 2689 if (!hdev->dram_binning) { 2690 prop->dram_binning_mask = 0; 2691 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK; 2692 return; 2693 } 2694 2695 /* set DRAM binning constraints */ 2696 prop->faulty_dram_cluster_map |= hdev->dram_binning; 2697 prop->dram_binning_mask = hdev->dram_binning; 2698 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5); 2699 } 2700 2701 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev) 2702 { 2703 struct asic_fixed_properties *prop = &hdev->asic_prop; 2704 struct hw_queue_properties *q_props; 2705 u8 seq, num_faulty; 2706 2707 num_faulty = hweight32(hdev->edma_binning); 2708 2709 /* 2710 * check for error condition in which number of binning candidates 2711 * is higher than the maximum supported by the driver 2712 */ 2713 if (num_faulty > MAX_FAULTY_EDMAS) { 2714 dev_err(hdev->dev, 2715 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n", 2716 hdev->edma_binning); 2717 return -EINVAL; 2718 } 2719 2720 if (!hdev->edma_binning) { 2721 prop->edma_binning_mask = 0; 2722 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK; 2723 return 0; 2724 } 2725 2726 seq = __ffs((unsigned long)hdev->edma_binning); 2727 2728 /* set binning constraints */ 2729 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]); 2730 prop->edma_binning_mask = hdev->edma_binning; 2731 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1); 2732 2733 /* bin substitute EDMA's queue */ 2734 q_props = prop->hw_queues_props; 2735 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1; 2736 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1; 2737 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1; 2738 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1; 2739 2740 return 0; 2741 } 2742 2743 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask) 2744 { 2745 struct asic_fixed_properties *prop = &hdev->asic_prop; 2746 u8 num_faulty, seq; 2747 2748 /* check if we should override default binning */ 2749 if (!xbar_edge_iso_mask) { 2750 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK; 2751 return 0; 2752 } 2753 2754 /* 2755 * note that it can be set to value other than 0 only after cpucp packet (i.e. 2756 * only the FW can set a redundancy value). for user it'll always be 0. 2757 */ 2758 num_faulty = hweight32(xbar_edge_iso_mask); 2759 2760 /* 2761 * check for error condition in which number of binning candidates 2762 * is higher than the maximum supported by the driver 2763 */ 2764 if (num_faulty > MAX_FAULTY_XBARS) { 2765 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n", 2766 MAX_FAULTY_XBARS); 2767 return -EINVAL; 2768 } 2769 2770 seq = __ffs((unsigned long)xbar_edge_iso_mask); 2771 2772 /* set binning constraints */ 2773 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]); 2774 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK; 2775 2776 return 0; 2777 } 2778 2779 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask) 2780 { 2781 int rc; 2782 2783 /* 2784 * mark all clusters as good, each component will "fail" cluster 2785 * based on eFuse/user values. 2786 * If more than single cluster is faulty- the chip is unusable 2787 */ 2788 hdev->asic_prop.faulty_dram_cluster_map = 0; 2789 2790 gaudi2_set_dram_binning_masks(hdev); 2791 2792 rc = gaudi2_set_edma_binning_masks(hdev); 2793 if (rc) 2794 return rc; 2795 2796 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask); 2797 if (rc) 2798 return rc; 2799 2800 2801 /* always initially set to full mask */ 2802 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK; 2803 2804 return 0; 2805 } 2806 2807 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev) 2808 { 2809 struct asic_fixed_properties *prop = &hdev->asic_prop; 2810 int rc; 2811 2812 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask); 2813 if (rc) 2814 return rc; 2815 2816 /* if we have DRAM binning reported by FW we should perform cluster config */ 2817 if (prop->faulty_dram_cluster_map) { 2818 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map); 2819 2820 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq]; 2821 } 2822 2823 return 0; 2824 } 2825 2826 static int gaudi2_set_binning_masks(struct hl_device *hdev) 2827 { 2828 int rc; 2829 2830 rc = gaudi2_set_cluster_binning_masks(hdev); 2831 if (rc) 2832 return rc; 2833 2834 rc = gaudi2_set_tpc_binning_masks(hdev); 2835 if (rc) 2836 return rc; 2837 2838 rc = gaudi2_set_dec_binning_masks(hdev); 2839 if (rc) 2840 return rc; 2841 2842 return 0; 2843 } 2844 2845 static int gaudi2_cpucp_info_get(struct hl_device *hdev) 2846 { 2847 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2848 struct asic_fixed_properties *prop = &hdev->asic_prop; 2849 long max_power; 2850 u64 dram_size; 2851 int rc; 2852 2853 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2854 return 0; 2855 2856 /* No point of asking this information again when not doing hard reset, as the device 2857 * CPU hasn't been reset 2858 */ 2859 if (hdev->reset_info.in_compute_reset) 2860 return 0; 2861 2862 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 2863 mmCPU_BOOT_ERR1); 2864 if (rc) 2865 return rc; 2866 2867 dram_size = le64_to_cpu(prop->cpucp_info.dram_size); 2868 if (dram_size) { 2869 /* we can have wither 5 or 6 HBMs. other values are invalid */ 2870 2871 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) && 2872 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) { 2873 dev_err(hdev->dev, 2874 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n", 2875 dram_size, prop->dram_size); 2876 dram_size = prop->dram_size; 2877 } 2878 2879 prop->dram_size = dram_size; 2880 prop->dram_end_address = prop->dram_base_address + dram_size; 2881 } 2882 2883 if (!strlen(prop->cpucp_info.card_name)) 2884 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2885 2886 /* Overwrite binning masks with the actual binning values from F/W */ 2887 hdev->dram_binning = prop->cpucp_info.dram_binning_mask; 2888 hdev->edma_binning = prop->cpucp_info.edma_binning_mask; 2889 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); 2890 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); 2891 2892 dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n", 2893 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning, 2894 hdev->decoder_binning); 2895 2896 /* 2897 * at this point the DRAM parameters need to be updated according to data obtained 2898 * from the FW 2899 */ 2900 rc = hdev->asic_funcs->set_dram_properties(hdev); 2901 if (rc) 2902 return rc; 2903 2904 rc = hdev->asic_funcs->set_binning_masks(hdev); 2905 if (rc) 2906 return rc; 2907 2908 max_power = hl_fw_get_max_power(hdev); 2909 if (max_power < 0) 2910 return max_power; 2911 2912 prop->max_power_default = (u64) max_power; 2913 2914 return 0; 2915 } 2916 2917 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) 2918 { 2919 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2920 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS]; 2921 int rc; 2922 2923 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2924 return 0; 2925 2926 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr); 2927 if (rc) 2928 return rc; 2929 2930 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3]; 2931 2932 return 0; 2933 } 2934 2935 static int gaudi2_early_init(struct hl_device *hdev) 2936 { 2937 struct asic_fixed_properties *prop = &hdev->asic_prop; 2938 struct pci_dev *pdev = hdev->pdev; 2939 resource_size_t pci_bar_size; 2940 int rc; 2941 2942 rc = gaudi2_set_fixed_properties(hdev); 2943 if (rc) 2944 return rc; 2945 2946 /* Check BAR sizes */ 2947 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID); 2948 2949 if (pci_bar_size != CFG_BAR_SIZE) { 2950 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2951 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 2952 rc = -ENODEV; 2953 goto free_queue_props; 2954 } 2955 2956 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID); 2957 if (pci_bar_size != MSIX_BAR_SIZE) { 2958 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2959 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE); 2960 rc = -ENODEV; 2961 goto free_queue_props; 2962 } 2963 2964 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2965 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2966 2967 /* 2968 * Only in pldm driver config iATU 2969 */ 2970 if (hdev->pldm) 2971 hdev->asic_prop.iatu_done_by_fw = false; 2972 else 2973 hdev->asic_prop.iatu_done_by_fw = true; 2974 2975 rc = hl_pci_init(hdev); 2976 if (rc) 2977 goto free_queue_props; 2978 2979 /* Before continuing in the initialization, we need to read the preboot 2980 * version to determine whether we run with a security-enabled firmware 2981 */ 2982 rc = hl_fw_read_preboot_status(hdev); 2983 if (rc) { 2984 if (hdev->reset_on_preboot_fail) 2985 /* we are already on failure flow, so don't check if hw_fini fails. */ 2986 hdev->asic_funcs->hw_fini(hdev, true, false); 2987 goto pci_fini; 2988 } 2989 2990 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 2991 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 2992 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2993 if (rc) { 2994 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 2995 goto pci_fini; 2996 } 2997 } 2998 2999 return 0; 3000 3001 pci_fini: 3002 hl_pci_fini(hdev); 3003 free_queue_props: 3004 kfree(hdev->asic_prop.hw_queues_props); 3005 return rc; 3006 } 3007 3008 static int gaudi2_early_fini(struct hl_device *hdev) 3009 { 3010 kfree(hdev->asic_prop.hw_queues_props); 3011 hl_pci_fini(hdev); 3012 3013 return 0; 3014 } 3015 3016 static bool gaudi2_is_arc_nic_owned(u64 arc_id) 3017 { 3018 switch (arc_id) { 3019 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 3020 return true; 3021 default: 3022 return false; 3023 } 3024 } 3025 3026 static bool gaudi2_is_arc_tpc_owned(u64 arc_id) 3027 { 3028 switch (arc_id) { 3029 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 3030 return true; 3031 default: 3032 return false; 3033 } 3034 } 3035 3036 static void gaudi2_init_arcs(struct hl_device *hdev) 3037 { 3038 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3039 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3040 u64 arc_id; 3041 u32 i; 3042 3043 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) { 3044 if (gaudi2_is_arc_enabled(hdev, i)) 3045 continue; 3046 3047 gaudi2_set_arc_id_cap(hdev, i); 3048 } 3049 3050 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 3051 if (!gaudi2_is_queue_enabled(hdev, i)) 3052 continue; 3053 3054 arc_id = gaudi2_queue_id_to_arc_id[i]; 3055 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3056 continue; 3057 3058 if (gaudi2_is_arc_nic_owned(arc_id) && 3059 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0))) 3060 continue; 3061 3062 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized & 3063 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0))) 3064 continue; 3065 3066 gaudi2_set_arc_id_cap(hdev, arc_id); 3067 } 3068 3069 /* Fetch ARC scratchpad address */ 3070 hdev->asic_prop.engine_core_interrupt_reg_addr = 3071 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl); 3072 } 3073 3074 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) 3075 { 3076 u32 reg_base, reg_val; 3077 int rc; 3078 3079 switch (cpu_id) { 3080 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3: 3081 /* Each ARC scheduler has 2 consecutive DCCM blocks */ 3082 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3083 ARC_DCCM_BLOCK_SIZE * 2, true); 3084 if (rc) 3085 return rc; 3086 break; 3087 case CPU_ID_SCHED_ARC4: 3088 case CPU_ID_SCHED_ARC5: 3089 case CPU_ID_MME_QMAN_ARC0: 3090 case CPU_ID_MME_QMAN_ARC1: 3091 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3092 3093 /* Scrub lower DCCM block */ 3094 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3095 ARC_DCCM_BLOCK_SIZE, true); 3096 if (rc) 3097 return rc; 3098 3099 /* Switch to upper DCCM block */ 3100 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1); 3101 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 3102 3103 /* Scrub upper DCCM block */ 3104 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3105 ARC_DCCM_BLOCK_SIZE, true); 3106 if (rc) 3107 return rc; 3108 3109 /* Switch to lower DCCM block */ 3110 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0); 3111 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 3112 break; 3113 default: 3114 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3115 ARC_DCCM_BLOCK_SIZE, true); 3116 if (rc) 3117 return rc; 3118 } 3119 3120 return 0; 3121 } 3122 3123 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev) 3124 { 3125 u16 arc_id; 3126 int rc; 3127 3128 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { 3129 if (!gaudi2_is_arc_enabled(hdev, arc_id)) 3130 continue; 3131 3132 rc = gaudi2_scrub_arc_dccm(hdev, arc_id); 3133 if (rc) 3134 return rc; 3135 } 3136 3137 return 0; 3138 } 3139 3140 static int gaudi2_late_init(struct hl_device *hdev) 3141 { 3142 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3143 int rc; 3144 3145 hdev->asic_prop.supports_advanced_cpucp_rc = true; 3146 3147 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 3148 gaudi2->virt_msix_db_dma_addr); 3149 if (rc) { 3150 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 3151 return rc; 3152 } 3153 3154 rc = gaudi2_fetch_psoc_frequency(hdev); 3155 if (rc) { 3156 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 3157 goto disable_pci_access; 3158 } 3159 3160 gaudi2_init_arcs(hdev); 3161 3162 rc = gaudi2_scrub_arcs_dccm(hdev); 3163 if (rc) { 3164 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n"); 3165 goto disable_pci_access; 3166 } 3167 3168 gaudi2_init_security(hdev); 3169 3170 return 0; 3171 3172 disable_pci_access: 3173 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 3174 3175 return rc; 3176 } 3177 3178 static void gaudi2_late_fini(struct hl_device *hdev) 3179 { 3180 hl_hwmon_release_resources(hdev); 3181 } 3182 3183 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) 3184 { 3185 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 3186 3187 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3188 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3189 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3190 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3191 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3192 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3193 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3194 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3195 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3196 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3197 } 3198 3199 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev) 3200 { 3201 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3202 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 3203 u32 block_size, umr_start_idx, num_umr_blocks; 3204 int i; 3205 3206 for (i = 0 ; i < NUM_ARC_CPUS ; i++) { 3207 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3) 3208 block_size = ARC_DCCM_BLOCK_SIZE * 2; 3209 else 3210 block_size = ARC_DCCM_BLOCK_SIZE; 3211 3212 blocks[i].address = gaudi2_arc_dccm_bases[i]; 3213 blocks[i].size = block_size; 3214 } 3215 3216 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE; 3217 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE; 3218 3219 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE; 3220 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE; 3221 3222 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE; 3223 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE; 3224 3225 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE; 3226 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE; 3227 3228 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE; 3229 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE; 3230 3231 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE; 3232 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE; 3233 3234 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE; 3235 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE; 3236 3237 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE; 3238 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE; 3239 3240 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS; 3241 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS; 3242 for (i = 0 ; i < num_umr_blocks ; i++) { 3243 u8 nic_id, umr_block_id; 3244 3245 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS; 3246 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS; 3247 3248 blocks[umr_start_idx + i].address = 3249 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE + 3250 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET + 3251 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET + 3252 umr_block_id * NIC_UMR_OFFSET; 3253 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE; 3254 } 3255 3256 /* Expose decoder HW configuration block to user */ 3257 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX); 3258 3259 for (i = 1; i < NUM_OF_DCORES; ++i) { 3260 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE; 3261 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE; 3262 3263 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address = 3264 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET; 3265 3266 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address = 3267 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET; 3268 } 3269 } 3270 3271 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 3272 { 3273 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 3274 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}; 3275 int i, j, rc = 0; 3276 3277 /* The device ARC works with 32-bits addresses, and because there is a single HW register 3278 * that holds the extension bits (49..28), these bits must be identical in all the allocated 3279 * range. 3280 */ 3281 3282 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 3283 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 3284 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO); 3285 if (!virt_addr_arr[i]) { 3286 rc = -ENOMEM; 3287 goto free_dma_mem_arr; 3288 } 3289 3290 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 3291 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr)) 3292 break; 3293 } 3294 3295 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) { 3296 dev_err(hdev->dev, 3297 "MSB of ARC accessible DMA memory are not identical in all range\n"); 3298 rc = -EFAULT; 3299 goto free_dma_mem_arr; 3300 } 3301 3302 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 3303 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 3304 3305 free_dma_mem_arr: 3306 for (j = 0 ; j < i ; j++) 3307 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 3308 dma_addr_arr[j]); 3309 3310 return rc; 3311 } 3312 3313 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev) 3314 { 3315 struct asic_fixed_properties *prop = &hdev->asic_prop; 3316 struct pci_mem_region *region; 3317 3318 /* CFG */ 3319 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 3320 region->region_base = CFG_BASE; 3321 region->region_size = CFG_SIZE; 3322 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR; 3323 region->bar_size = CFG_BAR_SIZE; 3324 region->bar_id = SRAM_CFG_BAR_ID; 3325 region->used = 1; 3326 3327 /* SRAM */ 3328 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 3329 region->region_base = SRAM_BASE_ADDR; 3330 region->region_size = SRAM_SIZE; 3331 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE; 3332 region->bar_size = CFG_BAR_SIZE; 3333 region->bar_id = SRAM_CFG_BAR_ID; 3334 region->used = 1; 3335 3336 /* DRAM */ 3337 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 3338 region->region_base = DRAM_PHYS_BASE; 3339 region->region_size = hdev->asic_prop.dram_size; 3340 region->offset_in_bar = 0; 3341 region->bar_size = prop->dram_pci_bar_size; 3342 region->bar_id = DRAM_BAR_ID; 3343 region->used = 1; 3344 } 3345 3346 static void gaudi2_user_interrupt_setup(struct hl_device *hdev) 3347 { 3348 struct asic_fixed_properties *prop = &hdev->asic_prop; 3349 int i, j, k; 3350 3351 /* Initialize TPC interrupt */ 3352 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC); 3353 3354 /* Initialize unexpected error interrupt */ 3355 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0, 3356 HL_USR_INTERRUPT_UNEXPECTED); 3357 3358 /* Initialize common user CQ interrupt */ 3359 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, 3360 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ); 3361 3362 /* Initialize common decoder interrupt */ 3363 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, 3364 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER); 3365 3366 /* User interrupts structure holds both decoder and user interrupts from various engines. 3367 * We first initialize the decoder interrupts and then we add the user interrupts. 3368 * The only limitation is that the last decoder interrupt id must be smaller 3369 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time. 3370 */ 3371 3372 /* Initialize decoder interrupts, expose only normal interrupts, 3373 * error interrupts to be handled by driver 3374 */ 3375 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM; 3376 i += 2, j++) 3377 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, 3378 HL_USR_INTERRUPT_DECODER); 3379 3380 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++) 3381 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ); 3382 } 3383 3384 static inline int gaudi2_get_non_zero_random_int(void) 3385 { 3386 int rand = get_random_u32(); 3387 3388 return rand ? rand : 1; 3389 } 3390 3391 static void gaudi2_special_blocks_free(struct hl_device *hdev) 3392 { 3393 struct asic_fixed_properties *prop = &hdev->asic_prop; 3394 struct hl_skip_blocks_cfg *skip_special_blocks_cfg = 3395 &prop->skip_special_blocks_cfg; 3396 3397 kfree(prop->special_blocks); 3398 kfree(skip_special_blocks_cfg->block_types); 3399 kfree(skip_special_blocks_cfg->block_ranges); 3400 } 3401 3402 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev) 3403 { 3404 gaudi2_special_blocks_free(hdev); 3405 } 3406 3407 static bool gaudi2_special_block_skip(struct hl_device *hdev, 3408 struct hl_special_blocks_cfg *special_blocks_cfg, 3409 u32 blk_idx, u32 major, u32 minor, u32 sub_minor) 3410 { 3411 return false; 3412 } 3413 3414 static int gaudi2_special_blocks_config(struct hl_device *hdev) 3415 { 3416 struct asic_fixed_properties *prop = &hdev->asic_prop; 3417 int i, rc; 3418 3419 /* Configure Special blocks */ 3420 prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; 3421 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); 3422 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, 3423 sizeof(*prop->special_blocks), GFP_KERNEL); 3424 if (!prop->special_blocks) 3425 return -ENOMEM; 3426 3427 for (i = 0 ; i < prop->num_of_special_blocks ; i++) 3428 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i], 3429 sizeof(*prop->special_blocks)); 3430 3431 /* Configure when to skip Special blocks */ 3432 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg)); 3433 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip; 3434 3435 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) { 3436 prop->skip_special_blocks_cfg.block_types = 3437 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types), 3438 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL); 3439 if (!prop->skip_special_blocks_cfg.block_types) { 3440 rc = -ENOMEM; 3441 goto free_special_blocks; 3442 } 3443 3444 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types, 3445 sizeof(gaudi2_iterator_skip_block_types)); 3446 3447 prop->skip_special_blocks_cfg.block_types_len = 3448 ARRAY_SIZE(gaudi2_iterator_skip_block_types); 3449 } 3450 3451 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) { 3452 prop->skip_special_blocks_cfg.block_ranges = 3453 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges), 3454 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL); 3455 if (!prop->skip_special_blocks_cfg.block_ranges) { 3456 rc = -ENOMEM; 3457 goto free_skip_special_blocks_types; 3458 } 3459 3460 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++) 3461 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i], 3462 &gaudi2_iterator_skip_block_ranges[i], 3463 sizeof(struct range)); 3464 3465 prop->skip_special_blocks_cfg.block_ranges_len = 3466 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges); 3467 } 3468 3469 return 0; 3470 3471 free_skip_special_blocks_types: 3472 kfree(prop->skip_special_blocks_cfg.block_types); 3473 free_special_blocks: 3474 kfree(prop->special_blocks); 3475 3476 return rc; 3477 } 3478 3479 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev) 3480 { 3481 return gaudi2_special_blocks_config(hdev); 3482 } 3483 3484 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev) 3485 { 3486 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3487 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info; 3488 int i; 3489 3490 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) { 3491 /* bail-out if this is an allocation failure point */ 3492 if (!msg_info[i].kern_addr) 3493 break; 3494 3495 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr); 3496 msg_info[i].kern_addr = NULL; 3497 } 3498 } 3499 3500 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev) 3501 { 3502 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3503 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info; 3504 int i, rc; 3505 3506 /* allocate a message-short buf for each Q we intend to test */ 3507 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) { 3508 msg_info[i].kern_addr = 3509 (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short), 3510 GFP_KERNEL, &msg_info[i].dma_addr); 3511 if (!msg_info[i].kern_addr) { 3512 dev_err(hdev->dev, 3513 "Failed to allocate dma memory for H/W queue %d testing\n", i); 3514 rc = -ENOMEM; 3515 goto err_exit; 3516 } 3517 } 3518 3519 return 0; 3520 3521 err_exit: 3522 gaudi2_test_queues_msgs_free(hdev); 3523 return rc; 3524 } 3525 3526 static int gaudi2_sw_init(struct hl_device *hdev) 3527 { 3528 struct asic_fixed_properties *prop = &hdev->asic_prop; 3529 struct gaudi2_device *gaudi2; 3530 int i, rc; 3531 3532 /* Allocate device structure */ 3533 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); 3534 if (!gaudi2) 3535 return -ENOMEM; 3536 3537 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) { 3538 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid) 3539 continue; 3540 3541 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) { 3542 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n", 3543 GAUDI2_EVENT_SIZE); 3544 rc = -EINVAL; 3545 goto free_gaudi2_device; 3546 } 3547 3548 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id; 3549 } 3550 3551 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) 3552 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int(); 3553 3554 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get; 3555 3556 hdev->asic_specific = gaudi2; 3557 3558 /* Create DMA pool for small allocations. 3559 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped 3560 * PI/CI registers allocated from this pool have this restriction 3561 */ 3562 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, 3563 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0); 3564 if (!hdev->dma_pool) { 3565 dev_err(hdev->dev, "failed to create DMA pool\n"); 3566 rc = -ENOMEM; 3567 goto free_gaudi2_device; 3568 } 3569 3570 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev); 3571 if (rc) 3572 goto free_dma_pool; 3573 3574 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 3575 if (!hdev->cpu_accessible_dma_pool) { 3576 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); 3577 rc = -ENOMEM; 3578 goto free_cpu_dma_mem; 3579 } 3580 3581 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, 3582 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 3583 if (rc) { 3584 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); 3585 rc = -EFAULT; 3586 goto free_cpu_accessible_dma_pool; 3587 } 3588 3589 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size, 3590 &gaudi2->virt_msix_db_dma_addr); 3591 if (!gaudi2->virt_msix_db_cpu_addr) { 3592 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n"); 3593 rc = -ENOMEM; 3594 goto free_cpu_accessible_dma_pool; 3595 } 3596 3597 spin_lock_init(&gaudi2->hw_queues_lock); 3598 3599 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3600 &gaudi2->scratchpad_bus_address, 3601 GFP_KERNEL | __GFP_ZERO); 3602 if (!gaudi2->scratchpad_kernel_address) { 3603 rc = -ENOMEM; 3604 goto free_virt_msix_db_mem; 3605 } 3606 3607 gaudi2_user_mapped_blocks_init(hdev); 3608 3609 /* Initialize user interrupts */ 3610 gaudi2_user_interrupt_setup(hdev); 3611 3612 hdev->supports_coresight = true; 3613 hdev->supports_sync_stream = true; 3614 hdev->supports_cb_mapping = true; 3615 hdev->supports_wait_for_multi_cs = false; 3616 3617 prop->supports_compute_reset = true; 3618 3619 hdev->asic_funcs->set_pci_memory_regions(hdev); 3620 3621 rc = gaudi2_special_blocks_iterator_config(hdev); 3622 if (rc) 3623 goto free_scratchpad_mem; 3624 3625 rc = gaudi2_test_queues_msgs_alloc(hdev); 3626 if (rc) 3627 goto special_blocks_free; 3628 3629 return 0; 3630 3631 special_blocks_free: 3632 gaudi2_special_blocks_iterator_free(hdev); 3633 free_scratchpad_mem: 3634 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3635 gaudi2->scratchpad_bus_address); 3636 free_virt_msix_db_mem: 3637 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3638 free_cpu_accessible_dma_pool: 3639 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3640 free_cpu_dma_mem: 3641 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3642 hdev->cpu_accessible_dma_address); 3643 free_dma_pool: 3644 dma_pool_destroy(hdev->dma_pool); 3645 free_gaudi2_device: 3646 kfree(gaudi2); 3647 return rc; 3648 } 3649 3650 static int gaudi2_sw_fini(struct hl_device *hdev) 3651 { 3652 struct asic_fixed_properties *prop = &hdev->asic_prop; 3653 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3654 3655 gaudi2_test_queues_msgs_free(hdev); 3656 3657 gaudi2_special_blocks_iterator_free(hdev); 3658 3659 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3660 3661 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3662 3663 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3664 hdev->cpu_accessible_dma_address); 3665 3666 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3667 gaudi2->scratchpad_bus_address); 3668 3669 dma_pool_destroy(hdev->dma_pool); 3670 3671 kfree(gaudi2); 3672 3673 return 0; 3674 } 3675 3676 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base) 3677 { 3678 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP | 3679 QM_GLBL_CFG1_CQF_STOP | 3680 QM_GLBL_CFG1_CP_STOP); 3681 3682 /* stop also the ARC */ 3683 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP); 3684 } 3685 3686 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base) 3687 { 3688 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH | 3689 QM_GLBL_CFG1_CQF_FLUSH | 3690 QM_GLBL_CFG1_CP_FLUSH); 3691 } 3692 3693 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base) 3694 { 3695 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH); 3696 } 3697 3698 /** 3699 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters 3700 * 3701 * @hdev: pointer to the habanalabs device structure 3702 * @queue_id: queue to clear fence counters to 3703 * @skip_fence: if true set maximum fence value to all fence counters to avoid 3704 * getting stuck on any fence value. otherwise set all fence 3705 * counters to 0 (standard clear of fence counters) 3706 */ 3707 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id, 3708 bool skip_fence) 3709 { 3710 u32 size, reg_base; 3711 u32 addr, val; 3712 3713 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3714 3715 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET; 3716 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0; 3717 3718 /* 3719 * in case we want to make sure that QM that is stuck on a fence will 3720 * be released we should set the fence counter to a higher value that 3721 * the value the QM waiting for. to comply with any fence counter of 3722 * any value we set maximum fence value to all counters 3723 */ 3724 val = skip_fence ? U32_MAX : 0; 3725 gaudi2_memset_device_lbw(hdev, addr, size, val); 3726 } 3727 3728 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id) 3729 { 3730 u32 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3731 3732 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true); 3733 gaudi2_flush_qman_common(hdev, reg_base); 3734 gaudi2_flush_qman_arc_common(hdev, reg_base); 3735 } 3736 3737 static void gaudi2_stop_dma_qmans(struct hl_device *hdev) 3738 { 3739 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3740 int dcore, inst; 3741 3742 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3743 goto stop_edma_qmans; 3744 3745 /* Stop CPs of PDMA QMANs */ 3746 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE); 3747 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE); 3748 3749 stop_edma_qmans: 3750 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3751 return; 3752 3753 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3754 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3755 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3756 u32 qm_base; 3757 3758 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3759 continue; 3760 3761 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3762 inst * DCORE_EDMA_OFFSET; 3763 3764 /* Stop CPs of EDMA QMANs */ 3765 gaudi2_stop_qman_common(hdev, qm_base); 3766 } 3767 } 3768 } 3769 3770 static void gaudi2_stop_mme_qmans(struct hl_device *hdev) 3771 { 3772 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3773 u32 offset, i; 3774 3775 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3776 3777 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 3778 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))) 3779 continue; 3780 3781 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3782 } 3783 } 3784 3785 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev) 3786 { 3787 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3788 u32 reg_base; 3789 int i; 3790 3791 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3792 return; 3793 3794 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3795 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3796 continue; 3797 3798 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3799 gaudi2_stop_qman_common(hdev, reg_base); 3800 } 3801 } 3802 3803 static void gaudi2_stop_rot_qmans(struct hl_device *hdev) 3804 { 3805 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3806 u32 reg_base; 3807 int i; 3808 3809 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3810 return; 3811 3812 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3813 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3814 continue; 3815 3816 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3817 gaudi2_stop_qman_common(hdev, reg_base); 3818 } 3819 } 3820 3821 static void gaudi2_stop_nic_qmans(struct hl_device *hdev) 3822 { 3823 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3824 u32 reg_base, queue_id; 3825 int i; 3826 3827 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3828 return; 3829 3830 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3831 3832 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3833 if (!(hdev->nic_ports_mask & BIT(i))) 3834 continue; 3835 3836 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3837 gaudi2_stop_qman_common(hdev, reg_base); 3838 } 3839 } 3840 3841 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base) 3842 { 3843 u32 reg_val; 3844 3845 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1); 3846 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val); 3847 } 3848 3849 static void gaudi2_dma_stall(struct hl_device *hdev) 3850 { 3851 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3852 int dcore, inst; 3853 3854 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3855 goto stall_edma; 3856 3857 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE); 3858 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE); 3859 3860 stall_edma: 3861 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3862 return; 3863 3864 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3865 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3866 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3867 u32 core_base; 3868 3869 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3870 continue; 3871 3872 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET + 3873 inst * DCORE_EDMA_OFFSET; 3874 3875 /* Stall CPs of EDMA QMANs */ 3876 gaudi2_stall_dma_common(hdev, core_base); 3877 } 3878 } 3879 } 3880 3881 static void gaudi2_mme_stall(struct hl_device *hdev) 3882 { 3883 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3884 u32 offset, i; 3885 3886 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL; 3887 3888 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3889 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3890 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1); 3891 } 3892 3893 static void gaudi2_tpc_stall(struct hl_device *hdev) 3894 { 3895 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3896 u32 reg_base; 3897 int i; 3898 3899 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3900 return; 3901 3902 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3903 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3904 continue; 3905 3906 reg_base = gaudi2_tpc_cfg_blocks_bases[i]; 3907 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1); 3908 } 3909 } 3910 3911 static void gaudi2_rotator_stall(struct hl_device *hdev) 3912 { 3913 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3914 u32 reg_val; 3915 int i; 3916 3917 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3918 return; 3919 3920 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) | 3921 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) | 3922 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1); 3923 3924 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3925 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3926 continue; 3927 3928 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val); 3929 } 3930 } 3931 3932 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base) 3933 { 3934 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0); 3935 } 3936 3937 static void gaudi2_disable_dma_qmans(struct hl_device *hdev) 3938 { 3939 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3940 int dcore, inst; 3941 3942 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3943 goto stop_edma_qmans; 3944 3945 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE); 3946 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE); 3947 3948 stop_edma_qmans: 3949 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3950 return; 3951 3952 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3953 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3954 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3955 u32 qm_base; 3956 3957 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3958 continue; 3959 3960 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3961 inst * DCORE_EDMA_OFFSET; 3962 3963 /* Disable CPs of EDMA QMANs */ 3964 gaudi2_disable_qman_common(hdev, qm_base); 3965 } 3966 } 3967 } 3968 3969 static void gaudi2_disable_mme_qmans(struct hl_device *hdev) 3970 { 3971 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3972 u32 offset, i; 3973 3974 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3975 3976 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3977 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3978 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3979 } 3980 3981 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev) 3982 { 3983 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3984 u32 reg_base; 3985 int i; 3986 3987 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3988 return; 3989 3990 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3991 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3992 continue; 3993 3994 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3995 gaudi2_disable_qman_common(hdev, reg_base); 3996 } 3997 } 3998 3999 static void gaudi2_disable_rot_qmans(struct hl_device *hdev) 4000 { 4001 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4002 u32 reg_base; 4003 int i; 4004 4005 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 4006 return; 4007 4008 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 4009 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 4010 continue; 4011 4012 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 4013 gaudi2_disable_qman_common(hdev, reg_base); 4014 } 4015 } 4016 4017 static void gaudi2_disable_nic_qmans(struct hl_device *hdev) 4018 { 4019 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4020 u32 reg_base, queue_id; 4021 int i; 4022 4023 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 4024 return; 4025 4026 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 4027 4028 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4029 if (!(hdev->nic_ports_mask & BIT(i))) 4030 continue; 4031 4032 reg_base = gaudi2_qm_blocks_bases[queue_id]; 4033 gaudi2_disable_qman_common(hdev, reg_base); 4034 } 4035 } 4036 4037 static void gaudi2_enable_timestamp(struct hl_device *hdev) 4038 { 4039 /* Disable the timestamp counter */ 4040 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 4041 4042 /* Zero the lower/upper parts of the 64-bit counter */ 4043 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0); 4044 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0); 4045 4046 /* Enable the counter */ 4047 WREG32(mmPSOC_TIMESTAMP_BASE, 1); 4048 } 4049 4050 static void gaudi2_disable_timestamp(struct hl_device *hdev) 4051 { 4052 /* Disable the timestamp counter */ 4053 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 4054 } 4055 4056 static const char *gaudi2_irq_name(u16 irq_number) 4057 { 4058 switch (irq_number) { 4059 case GAUDI2_IRQ_NUM_EVENT_QUEUE: 4060 return "gaudi2 cpu eq"; 4061 case GAUDI2_IRQ_NUM_COMPLETION: 4062 return "gaudi2 completion"; 4063 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: 4064 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; 4065 case GAUDI2_IRQ_NUM_TPC_ASSERT: 4066 return "gaudi2 tpc assert"; 4067 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR: 4068 return "gaudi2 unexpected error"; 4069 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: 4070 return "gaudi2 user completion"; 4071 default: 4072 return "invalid"; 4073 } 4074 } 4075 4076 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) 4077 { 4078 int i, irq, relative_idx; 4079 struct hl_dec *dec; 4080 4081 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) { 4082 irq = pci_irq_vector(hdev->pdev, i); 4083 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 4084 4085 dec = hdev->dec + relative_idx / 2; 4086 4087 /* We pass different structures depending on the irq handler. For the abnormal 4088 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 4089 * user_interrupt entry 4090 */ 4091 free_irq(irq, ((relative_idx % 2) ? 4092 (void *) dec : 4093 (void *) &hdev->user_interrupt[dec->core_id])); 4094 } 4095 } 4096 4097 static int gaudi2_dec_enable_msix(struct hl_device *hdev) 4098 { 4099 int rc, i, irq_init_cnt, irq, relative_idx; 4100 struct hl_dec *dec; 4101 4102 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; 4103 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM; 4104 i++, irq_init_cnt++) { 4105 4106 irq = pci_irq_vector(hdev->pdev, i); 4107 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 4108 4109 /* We pass different structures depending on the irq handler. For the abnormal 4110 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 4111 * user_interrupt entry 4112 * 4113 * TODO: change the dec abnrm to threaded irq 4114 */ 4115 4116 dec = hdev->dec + relative_idx / 2; 4117 if (relative_idx % 2) { 4118 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0, 4119 gaudi2_irq_name(i), (void *) dec); 4120 } else { 4121 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, 4122 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, 4123 gaudi2_irq_name(i), 4124 (void *) &hdev->user_interrupt[dec->core_id]); 4125 } 4126 4127 if (rc) { 4128 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4129 goto free_dec_irqs; 4130 } 4131 } 4132 4133 return 0; 4134 4135 free_dec_irqs: 4136 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt)); 4137 return rc; 4138 } 4139 4140 static int gaudi2_enable_msix(struct hl_device *hdev) 4141 { 4142 struct asic_fixed_properties *prop = &hdev->asic_prop; 4143 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4144 int rc, irq, i, j, user_irq_init_cnt; 4145 struct hl_cq *cq; 4146 4147 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) 4148 return 0; 4149 4150 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, 4151 PCI_IRQ_MSIX); 4152 if (rc < 0) { 4153 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n", 4154 GAUDI2_MSIX_ENTRIES, rc); 4155 return rc; 4156 } 4157 4158 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 4159 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 4160 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq); 4161 if (rc) { 4162 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4163 goto free_irq_vectors; 4164 } 4165 4166 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 4167 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE), 4168 &hdev->event_queue); 4169 if (rc) { 4170 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4171 goto free_completion_irq; 4172 } 4173 4174 rc = gaudi2_dec_enable_msix(hdev); 4175 if (rc) { 4176 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 4177 goto free_event_irq; 4178 } 4179 4180 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); 4181 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, 4182 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, 4183 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt); 4184 if (rc) { 4185 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4186 goto free_dec_irq; 4187 } 4188 4189 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); 4190 rc = request_irq(irq, hl_irq_handler_user_interrupt, 0, 4191 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR), 4192 &hdev->unexpected_error_interrupt); 4193 if (rc) { 4194 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4195 goto free_tpc_irq; 4196 } 4197 4198 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; 4199 user_irq_init_cnt < prop->user_interrupt_count; 4200 i++, j++, user_irq_init_cnt++) { 4201 4202 irq = pci_irq_vector(hdev->pdev, i); 4203 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, 4204 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, 4205 gaudi2_irq_name(i), &hdev->user_interrupt[j]); 4206 4207 if (rc) { 4208 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4209 goto free_user_irq; 4210 } 4211 } 4212 4213 gaudi2->hw_cap_initialized |= HW_CAP_MSIX; 4214 4215 return 0; 4216 4217 free_user_irq: 4218 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count; 4219 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { 4220 4221 irq = pci_irq_vector(hdev->pdev, i); 4222 free_irq(irq, &hdev->user_interrupt[j]); 4223 } 4224 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); 4225 free_irq(irq, &hdev->unexpected_error_interrupt); 4226 free_tpc_irq: 4227 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); 4228 free_irq(irq, &hdev->tpc_interrupt); 4229 free_dec_irq: 4230 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1); 4231 free_event_irq: 4232 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 4233 free_irq(irq, cq); 4234 4235 free_completion_irq: 4236 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 4237 free_irq(irq, cq); 4238 4239 free_irq_vectors: 4240 pci_free_irq_vectors(hdev->pdev); 4241 4242 return rc; 4243 } 4244 4245 static void gaudi2_sync_irqs(struct hl_device *hdev) 4246 { 4247 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4248 int i, j; 4249 int irq; 4250 4251 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 4252 return; 4253 4254 /* Wait for all pending IRQs to be finished */ 4255 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION)); 4256 4257 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) { 4258 irq = pci_irq_vector(hdev->pdev, i); 4259 synchronize_irq(irq); 4260 } 4261 4262 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT)); 4263 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR)); 4264 4265 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; 4266 i++, j++) { 4267 irq = pci_irq_vector(hdev->pdev, i); 4268 synchronize_irq(irq); 4269 } 4270 4271 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); 4272 } 4273 4274 static void gaudi2_disable_msix(struct hl_device *hdev) 4275 { 4276 struct asic_fixed_properties *prop = &hdev->asic_prop; 4277 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4278 struct hl_cq *cq; 4279 int irq, i, j, k; 4280 4281 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 4282 return; 4283 4284 gaudi2_sync_irqs(hdev); 4285 4286 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 4287 free_irq(irq, &hdev->event_queue); 4288 4289 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 4290 4291 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); 4292 free_irq(irq, &hdev->tpc_interrupt); 4293 4294 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); 4295 free_irq(irq, &hdev->unexpected_error_interrupt); 4296 4297 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; 4298 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { 4299 4300 irq = pci_irq_vector(hdev->pdev, i); 4301 free_irq(irq, &hdev->user_interrupt[j]); 4302 } 4303 4304 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 4305 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 4306 free_irq(irq, cq); 4307 4308 pci_free_irq_vectors(hdev->pdev); 4309 4310 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; 4311 } 4312 4313 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id) 4314 { 4315 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 4316 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 4317 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 4318 int rc; 4319 4320 if (hdev->pldm) 4321 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 4322 else 4323 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 4324 4325 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4326 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 4327 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4328 continue; 4329 4330 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; 4331 4332 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0); 4333 4334 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 4335 4336 /* Wait till all traffic from decoder stops 4337 * before apply core reset. 4338 */ 4339 rc = hl_poll_timeout( 4340 hdev, 4341 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, 4342 graceful, 4343 (graceful & graceful_pend_mask), 4344 100, 4345 timeout_usec); 4346 if (rc) 4347 dev_err(hdev->dev, 4348 "Failed to stop traffic from DCORE%d Decoder %d\n", 4349 dcore_id, dec_id); 4350 } 4351 } 4352 4353 static void gaudi2_stop_pcie_dec(struct hl_device *hdev) 4354 { 4355 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 4356 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 4357 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 4358 int rc; 4359 4360 if (hdev->pldm) 4361 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 4362 else 4363 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 4364 4365 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4366 dec_bit = PCIE_DEC_SHIFT + dec_id; 4367 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4368 continue; 4369 4370 offset = dec_id * PCIE_VDEC_OFFSET; 4371 4372 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0); 4373 4374 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 4375 4376 /* Wait till all traffic from decoder stops 4377 * before apply core reset. 4378 */ 4379 rc = hl_poll_timeout( 4380 hdev, 4381 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, 4382 graceful, 4383 (graceful & graceful_pend_mask), 4384 100, 4385 timeout_usec); 4386 if (rc) 4387 dev_err(hdev->dev, 4388 "Failed to stop traffic from PCIe Decoder %d\n", 4389 dec_id); 4390 } 4391 } 4392 4393 static void gaudi2_stop_dec(struct hl_device *hdev) 4394 { 4395 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4396 int dcore_id; 4397 4398 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0) 4399 return; 4400 4401 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 4402 gaudi2_stop_dcore_dec(hdev, dcore_id); 4403 4404 gaudi2_stop_pcie_dec(hdev); 4405 } 4406 4407 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 4408 { 4409 u32 reg_base, reg_val; 4410 4411 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 4412 if (run_mode == HL_ENGINE_CORE_RUN) 4413 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 4414 else 4415 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 4416 4417 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 4418 } 4419 4420 static void gaudi2_halt_arcs(struct hl_device *hdev) 4421 { 4422 u16 arc_id; 4423 4424 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 4425 if (gaudi2_is_arc_enabled(hdev, arc_id)) 4426 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 4427 } 4428 } 4429 4430 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 4431 { 4432 int rc; 4433 u32 reg_base, val, ack_mask, timeout_usec = 100000; 4434 4435 if (hdev->pldm) 4436 timeout_usec *= 100; 4437 4438 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 4439 if (run_mode == HL_ENGINE_CORE_RUN) 4440 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 4441 else 4442 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 4443 4444 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 4445 val, ((val & ack_mask) == ack_mask), 4446 1000, timeout_usec); 4447 4448 if (!rc) { 4449 /* Clear */ 4450 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 4451 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 4452 } 4453 4454 return rc; 4455 } 4456 4457 static void gaudi2_reset_arcs(struct hl_device *hdev) 4458 { 4459 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4460 u16 arc_id; 4461 4462 if (!gaudi2) 4463 return; 4464 4465 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) 4466 if (gaudi2_is_arc_enabled(hdev, arc_id)) 4467 gaudi2_clr_arc_id_cap(hdev, arc_id); 4468 } 4469 4470 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) 4471 { 4472 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4473 u32 queue_id; 4474 int i; 4475 4476 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 4477 return; 4478 4479 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 4480 4481 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4482 if (!(hdev->nic_ports_mask & BIT(i))) 4483 continue; 4484 4485 gaudi2_qman_manual_flush_common(hdev, queue_id); 4486 } 4487 } 4488 4489 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 4490 u32 num_cores, u32 core_command) 4491 { 4492 int i, rc; 4493 4494 for (i = 0 ; i < num_cores ; i++) { 4495 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 4496 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 4497 } 4498 4499 for (i = 0 ; i < num_cores ; i++) { 4500 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 4501 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 4502 4503 if (rc) { 4504 dev_err(hdev->dev, "failed to %s arc: %d\n", 4505 (core_command == HL_ENGINE_CORE_HALT) ? 4506 "HALT" : "RUN", core_ids[i]); 4507 return -1; 4508 } 4509 } 4510 } 4511 4512 return 0; 4513 } 4514 4515 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) 4516 { 4517 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4518 u32 reg_base, reg_addr, reg_val, tpc_id; 4519 4520 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 4521 return 0; 4522 4523 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id]; 4524 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id))) 4525 return 0; 4526 4527 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id]; 4528 reg_addr = reg_base + TPC_CFG_STALL_OFFSET; 4529 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK, 4530 (engine_command == HL_ENGINE_STALL) ? 1 : 0); 4531 WREG32(reg_addr, reg_val); 4532 4533 if (engine_command == HL_ENGINE_RESUME) { 4534 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id]; 4535 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET; 4536 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK); 4537 } 4538 4539 return 0; 4540 } 4541 4542 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) 4543 { 4544 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4545 u32 reg_base, reg_addr, reg_val, mme_id; 4546 4547 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id]; 4548 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id))) 4549 return 0; 4550 4551 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id]; 4552 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET; 4553 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK, 4554 (engine_command == HL_ENGINE_STALL) ? 1 : 0); 4555 WREG32(reg_addr, reg_val); 4556 4557 return 0; 4558 } 4559 4560 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) 4561 { 4562 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4563 u32 reg_base, reg_addr, reg_val, edma_id; 4564 4565 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 4566 return 0; 4567 4568 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id]; 4569 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id))) 4570 return 0; 4571 4572 reg_base = gaudi2_dma_core_blocks_bases[edma_id]; 4573 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET; 4574 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 4575 (engine_command == HL_ENGINE_STALL) ? 1 : 0); 4576 WREG32(reg_addr, reg_val); 4577 4578 if (engine_command == HL_ENGINE_STALL) { 4579 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) | 4580 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1); 4581 WREG32(reg_addr, reg_val); 4582 } 4583 4584 return 0; 4585 } 4586 4587 static int gaudi2_set_engine_modes(struct hl_device *hdev, 4588 u32 *engine_ids, u32 num_engines, u32 engine_command) 4589 { 4590 int i, rc; 4591 4592 for (i = 0 ; i < num_engines ; ++i) { 4593 switch (engine_ids[i]) { 4594 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5: 4595 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5: 4596 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5: 4597 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5: 4598 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command); 4599 if (rc) 4600 return rc; 4601 4602 break; 4603 case GAUDI2_DCORE0_ENGINE_ID_MME: 4604 case GAUDI2_DCORE1_ENGINE_ID_MME: 4605 case GAUDI2_DCORE2_ENGINE_ID_MME: 4606 case GAUDI2_DCORE3_ENGINE_ID_MME: 4607 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command); 4608 if (rc) 4609 return rc; 4610 4611 break; 4612 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1: 4613 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1: 4614 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1: 4615 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1: 4616 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command); 4617 if (rc) 4618 return rc; 4619 4620 break; 4621 default: 4622 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]); 4623 return -EINVAL; 4624 } 4625 } 4626 4627 return 0; 4628 } 4629 4630 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids, 4631 u32 num_engines, u32 engine_command) 4632 { 4633 switch (engine_command) { 4634 case HL_ENGINE_CORE_HALT: 4635 case HL_ENGINE_CORE_RUN: 4636 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command); 4637 4638 case HL_ENGINE_STALL: 4639 case HL_ENGINE_RESUME: 4640 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command); 4641 4642 default: 4643 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command); 4644 return -EINVAL; 4645 } 4646 } 4647 4648 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4649 { 4650 u32 wait_timeout_ms; 4651 4652 if (hdev->pldm) 4653 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC; 4654 else 4655 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 4656 4657 if (fw_reset) 4658 goto skip_engines; 4659 4660 gaudi2_stop_dma_qmans(hdev); 4661 gaudi2_stop_mme_qmans(hdev); 4662 gaudi2_stop_tpc_qmans(hdev); 4663 gaudi2_stop_rot_qmans(hdev); 4664 gaudi2_stop_nic_qmans(hdev); 4665 msleep(wait_timeout_ms); 4666 4667 gaudi2_halt_arcs(hdev); 4668 gaudi2_dma_stall(hdev); 4669 gaudi2_mme_stall(hdev); 4670 gaudi2_tpc_stall(hdev); 4671 gaudi2_rotator_stall(hdev); 4672 4673 msleep(wait_timeout_ms); 4674 4675 gaudi2_stop_dec(hdev); 4676 4677 /* 4678 * in case of soft reset do a manual flush for QMANs (currently called 4679 * only for NIC QMANs 4680 */ 4681 if (!hard_reset) 4682 gaudi2_nic_qmans_manual_flush(hdev); 4683 4684 gaudi2_disable_dma_qmans(hdev); 4685 gaudi2_disable_mme_qmans(hdev); 4686 gaudi2_disable_tpc_qmans(hdev); 4687 gaudi2_disable_rot_qmans(hdev); 4688 gaudi2_disable_nic_qmans(hdev); 4689 gaudi2_disable_timestamp(hdev); 4690 4691 skip_engines: 4692 if (hard_reset) { 4693 gaudi2_disable_msix(hdev); 4694 return; 4695 } 4696 4697 gaudi2_sync_irqs(hdev); 4698 } 4699 4700 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) 4701 { 4702 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 4703 4704 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 4705 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 4706 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 4707 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 4708 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 4709 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; 4710 } 4711 4712 static void gaudi2_init_firmware_loader(struct hl_device *hdev) 4713 { 4714 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 4715 struct dynamic_fw_load_mgr *dynamic_loader; 4716 struct cpu_dyn_regs *dyn_regs; 4717 4718 /* fill common fields */ 4719 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 4720 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE; 4721 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE; 4722 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC; 4723 fw_loader->skip_bmc = false; 4724 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; 4725 fw_loader->dram_bar_id = DRAM_BAR_ID; 4726 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; 4727 4728 /* here we update initial values for few specific dynamic regs (as 4729 * before reading the first descriptor from FW those value has to be 4730 * hard-coded). in later stages of the protocol those values will be 4731 * updated automatically by reading the FW descriptor so data there 4732 * will always be up-to-date 4733 */ 4734 dynamic_loader = &hdev->fw_loader.dynamic_loader; 4735 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 4736 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 4737 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 4738 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC; 4739 } 4740 4741 static int gaudi2_init_cpu(struct hl_device *hdev) 4742 { 4743 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4744 int rc; 4745 4746 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 4747 return 0; 4748 4749 if (gaudi2->hw_cap_initialized & HW_CAP_CPU) 4750 return 0; 4751 4752 rc = hl_fw_init_cpu(hdev); 4753 if (rc) 4754 return rc; 4755 4756 gaudi2->hw_cap_initialized |= HW_CAP_CPU; 4757 4758 return 0; 4759 } 4760 4761 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 4762 { 4763 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 4764 struct asic_fixed_properties *prop = &hdev->asic_prop; 4765 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4766 struct cpu_dyn_regs *dyn_regs; 4767 struct hl_eq *eq; 4768 u32 status; 4769 int err; 4770 4771 if (!hdev->cpu_queues_enable) 4772 return 0; 4773 4774 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 4775 return 0; 4776 4777 eq = &hdev->event_queue; 4778 4779 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4780 4781 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4782 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4783 4784 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4785 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4786 4787 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address)); 4788 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address)); 4789 4790 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4791 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4792 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4793 4794 /* Used for EQ CI */ 4795 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4796 4797 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4798 4799 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4800 4801 /* Let the ARC know we are ready as it is now handling those queues */ 4802 4803 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 4804 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 4805 4806 err = hl_poll_timeout( 4807 hdev, 4808 mmCPU_IF_QUEUE_INIT, 4809 status, 4810 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4811 1000, 4812 cpu_timeout); 4813 4814 if (err) { 4815 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n"); 4816 return -EIO; 4817 } 4818 4819 /* update FW application security bits */ 4820 if (prop->fw_cpu_boot_dev_sts0_valid) 4821 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4822 4823 if (prop->fw_cpu_boot_dev_sts1_valid) 4824 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4825 4826 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q; 4827 return 0; 4828 } 4829 4830 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, 4831 u32 queue_id_base) 4832 { 4833 struct hl_hw_queue *q; 4834 u32 pq_id, pq_offset; 4835 4836 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4837 q = &hdev->kernel_queues[queue_id_base + pq_id]; 4838 pq_offset = pq_id * 4; 4839 4840 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, 4841 lower_32_bits(q->bus_address)); 4842 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, 4843 upper_32_bits(q->bus_address)); 4844 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); 4845 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); 4846 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); 4847 } 4848 } 4849 4850 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base) 4851 { 4852 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi; 4853 4854 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4855 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4856 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4857 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4858 4859 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) { 4860 cp_offset = cp_id * 4; 4861 4862 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo); 4863 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi); 4864 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo); 4865 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi); 4866 } 4867 4868 /* allow QMANs to accept work from ARC CQF */ 4869 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1)); 4870 } 4871 4872 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base, 4873 u32 queue_id_base) 4874 { 4875 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4876 u32 pq_id, pq_offset, so_base_lo, so_base_hi; 4877 4878 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4879 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4880 4881 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4882 pq_offset = pq_id * 4; 4883 4884 /* Configure QMAN HBW to scratchpad as it is not needed */ 4885 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset, 4886 lower_32_bits(gaudi2->scratchpad_bus_address)); 4887 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset, 4888 upper_32_bits(gaudi2->scratchpad_bus_address)); 4889 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset, 4890 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry))); 4891 4892 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0); 4893 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA); 4894 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo); 4895 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi); 4896 } 4897 4898 /* Enable QMAN H/W completion */ 4899 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 4900 } 4901 4902 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base) 4903 { 4904 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4905 u32 sp_reg_addr; 4906 4907 switch (queue_id_base) { 4908 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3: 4909 fallthrough; 4910 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 4911 fallthrough; 4912 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 4913 fallthrough; 4914 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 4915 fallthrough; 4916 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 4917 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 4918 break; 4919 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 4920 fallthrough; 4921 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 4922 fallthrough; 4923 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 4924 fallthrough; 4925 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 4926 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 4927 break; 4928 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 4929 fallthrough; 4930 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 4931 fallthrough; 4932 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 4933 fallthrough; 4934 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 4935 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 4936 break; 4937 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3: 4938 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl); 4939 break; 4940 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3: 4941 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 4942 break; 4943 default: 4944 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base); 4945 return 0; 4946 } 4947 4948 return sp_reg_addr; 4949 } 4950 4951 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, 4952 u32 queue_id_base) 4953 { 4954 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset; 4955 int map_table_entry; 4956 4957 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot); 4958 4959 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base); 4960 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset)); 4961 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset)); 4962 4963 map_table_entry = gaudi2_qman_async_event_id[queue_id_base]; 4964 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET, 4965 gaudi2_irq_map_table[map_table_entry].cpu_id); 4966 4967 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK); 4968 4969 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT); 4970 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0); 4971 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4972 4973 /* Enable the QMAN channel. 4974 * PDMA QMAN configuration is different, as we do not allow user to 4975 * access some of the CPs. 4976 * PDMA0: CP2/3 are reserved for the ARC usage. 4977 * PDMA1: CP1/2/3 are reserved for the ARC usage. 4978 */ 4979 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4980 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4981 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4982 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4983 else 4984 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4985 } 4986 4987 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base, 4988 u32 queue_id_base) 4989 { 4990 u32 pq_id; 4991 4992 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) 4993 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION; 4994 4995 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base); 4996 gaudi2_init_qman_cp(hdev, reg_base); 4997 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base); 4998 gaudi2_init_qman_common(hdev, reg_base, queue_id_base); 4999 } 5000 5001 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base, 5002 u32 dma_core_id, bool is_secure) 5003 { 5004 u32 prot, irq_handler_offset; 5005 struct cpu_dyn_regs *dyn_regs; 5006 int map_table_entry; 5007 5008 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT; 5009 if (is_secure) 5010 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT; 5011 5012 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot); 5013 5014 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5015 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 5016 5017 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET, 5018 lower_32_bits(CFG_BASE + irq_handler_offset)); 5019 5020 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET, 5021 upper_32_bits(CFG_BASE + irq_handler_offset)); 5022 5023 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id]; 5024 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET, 5025 gaudi2_irq_map_table[map_table_entry].cpu_id); 5026 5027 /* Enable the DMA channel */ 5028 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT); 5029 } 5030 5031 static void gaudi2_init_kdma(struct hl_device *hdev) 5032 { 5033 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5034 u32 reg_base; 5035 5036 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA) 5037 return; 5038 5039 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA]; 5040 5041 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true); 5042 5043 gaudi2->hw_cap_initialized |= HW_CAP_KDMA; 5044 } 5045 5046 static void gaudi2_init_pdma(struct hl_device *hdev) 5047 { 5048 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5049 u32 reg_base; 5050 5051 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK) 5052 return; 5053 5054 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0]; 5055 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false); 5056 5057 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]; 5058 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0); 5059 5060 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1]; 5061 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false); 5062 5063 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]; 5064 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0); 5065 5066 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK; 5067 } 5068 5069 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq) 5070 { 5071 u32 reg_base, base_edma_core_id, base_edma_qman_id; 5072 5073 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq; 5074 base_edma_qman_id = edma_stream_base[seq]; 5075 5076 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id]; 5077 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false); 5078 5079 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id]; 5080 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id); 5081 } 5082 5083 static void gaudi2_init_edma(struct hl_device *hdev) 5084 { 5085 struct asic_fixed_properties *prop = &hdev->asic_prop; 5086 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5087 int dcore, inst; 5088 5089 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK) 5090 return; 5091 5092 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 5093 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 5094 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 5095 5096 if (!(prop->edma_enabled_mask & BIT(seq))) 5097 continue; 5098 5099 gaudi2_init_edma_instance(hdev, seq); 5100 5101 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq); 5102 } 5103 } 5104 } 5105 5106 /* 5107 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. 5108 * @hdev: pointer to habanalabs device structure. 5109 * @sob_id: sync object ID. 5110 * @first_mon_id: ID of first monitor out of 3 consecutive monitors. 5111 * @interrupt_id: interrupt ID. 5112 * 5113 * Some initiators cannot have HBW address in their completion address registers, and thus cannot 5114 * write directly to the HBW host memory of the virtual MSI-X doorbell. 5115 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. 5116 * 5117 * The mechanism in the sync manager block is composed of a master monitor with 3 messages. 5118 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next 5119 * completion, by decrementing the sync object value and re-arming the monitor. 5120 */ 5121 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, 5122 u32 first_mon_id, u32 interrupt_id) 5123 { 5124 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; 5125 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5126 u64 addr; 5127 u8 mask; 5128 5129 /* Reset the SOB value */ 5130 sob_offset = sob_id * sizeof(u32); 5131 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 5132 5133 /* Configure 3 monitors: 5134 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) 5135 * 2. Decrement SOB value by 1. 5136 * 3. Re-arm the master monitor. 5137 */ 5138 5139 first_mon_offset = first_mon_id * sizeof(u32); 5140 5141 /* 2nd monitor: Decrement SOB value by 1 */ 5142 mon_offset = first_mon_offset + sizeof(u32); 5143 5144 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 5145 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 5146 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 5147 5148 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ 5149 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | 5150 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); 5151 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 5152 5153 /* 3rd monitor: Re-arm the master monitor */ 5154 mon_offset = first_mon_offset + 2 * sizeof(u32); 5155 5156 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; 5157 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 5158 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 5159 5160 sob_group = sob_id / 8; 5161 mask = ~BIT(sob_id & 0x7); 5162 mode = 0; /* comparison mode is "greater than or equal to" */ 5163 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | 5164 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | 5165 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | 5166 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); 5167 5168 payload = arm; 5169 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 5170 5171 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ 5172 mon_offset = first_mon_offset; 5173 5174 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ 5175 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); 5176 5177 addr = gaudi2->virt_msix_db_dma_addr; 5178 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 5179 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 5180 5181 payload = interrupt_id; 5182 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 5183 5184 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); 5185 } 5186 5187 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) 5188 { 5189 u32 decoder_id, sob_id, first_mon_id, interrupt_id; 5190 struct asic_fixed_properties *prop = &hdev->asic_prop; 5191 5192 /* Decoder normal/abnormal interrupts */ 5193 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { 5194 if (!(prop->decoder_enabled_mask & BIT(decoder_id))) 5195 continue; 5196 5197 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 5198 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; 5199 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; 5200 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 5201 5202 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 5203 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; 5204 interrupt_id += 1; 5205 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 5206 } 5207 } 5208 5209 static void gaudi2_init_sm(struct hl_device *hdev) 5210 { 5211 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5212 u64 cq_address; 5213 u32 reg_val; 5214 int i; 5215 5216 /* Enable HBW/LBW CQ for completion monitors */ 5217 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 5218 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1); 5219 5220 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++) 5221 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 5222 5223 /* Enable only HBW CQ for KDMA completion monitor */ 5224 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 5225 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 5226 5227 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ 5228 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); 5229 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); 5230 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); 5231 5232 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { 5233 cq_address = 5234 hdev->completion_queue[i].bus_address; 5235 5236 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i), 5237 lower_32_bits(cq_address)); 5238 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i), 5239 upper_32_bits(cq_address)); 5240 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i), 5241 ilog2(HL_CQ_SIZE_IN_BYTES)); 5242 } 5243 5244 /* Configure kernel ASID and MMU BP*/ 5245 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); 5246 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); 5247 5248 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ 5249 gaudi2_prepare_sm_for_virt_msix_db(hdev); 5250 } 5251 5252 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) 5253 { 5254 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5255 u32 reg_val; 5256 int i; 5257 5258 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0); 5259 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1); 5260 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1); 5261 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1); 5262 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1); 5263 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1); 5264 5265 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val); 5266 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF); 5267 5268 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) { 5269 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i); 5270 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]); 5271 } 5272 } 5273 5274 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, 5275 bool config_qman_only) 5276 { 5277 u32 queue_id_base, reg_base; 5278 5279 switch (dcore_id) { 5280 case 0: 5281 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 5282 break; 5283 case 1: 5284 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 5285 break; 5286 case 2: 5287 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 5288 break; 5289 case 3: 5290 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 5291 break; 5292 default: 5293 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); 5294 return; 5295 } 5296 5297 if (!config_qman_only) { 5298 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; 5299 gaudi2_init_mme_acc(hdev, reg_base); 5300 } 5301 5302 reg_base = gaudi2_qm_blocks_bases[queue_id_base]; 5303 gaudi2_init_qman(hdev, reg_base, queue_id_base); 5304 } 5305 5306 static void gaudi2_init_mme(struct hl_device *hdev) 5307 { 5308 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5309 int i; 5310 5311 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK) 5312 return; 5313 5314 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 5315 gaudi2_init_dcore_mme(hdev, i, false); 5316 5317 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i); 5318 } 5319 } 5320 5321 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base) 5322 { 5323 /* Mask arithmetic and QM interrupts in TPC */ 5324 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE); 5325 5326 /* Set 16 cache lines */ 5327 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET, 5328 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT); 5329 } 5330 5331 struct gaudi2_tpc_init_cfg_data { 5332 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES]; 5333 }; 5334 5335 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 5336 u32 offset, struct iterate_module_ctx *ctx) 5337 { 5338 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5339 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 5340 u32 queue_id_base; 5341 u8 seq; 5342 5343 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN); 5344 5345 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1)) 5346 /* gets last sequence number */ 5347 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE; 5348 else 5349 seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 5350 5351 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset); 5352 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base); 5353 5354 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq); 5355 } 5356 5357 static void gaudi2_init_tpc(struct hl_device *hdev) 5358 { 5359 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5360 struct gaudi2_tpc_init_cfg_data init_cfg_data; 5361 struct iterate_module_ctx tpc_iter; 5362 5363 if (!hdev->asic_prop.tpc_enabled_mask) 5364 return; 5365 5366 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK) 5367 return; 5368 5369 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0; 5370 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0; 5371 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0; 5372 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0; 5373 tpc_iter.fn = &gaudi2_init_tpc_config; 5374 tpc_iter.data = &init_cfg_data; 5375 gaudi2_iterate_tpcs(hdev, &tpc_iter); 5376 } 5377 5378 static void gaudi2_init_rotator(struct hl_device *hdev) 5379 { 5380 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5381 u32 i, reg_base, queue_id; 5382 5383 queue_id = GAUDI2_QUEUE_ID_ROT_0_0; 5384 5385 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 5386 reg_base = gaudi2_qm_blocks_bases[queue_id]; 5387 gaudi2_init_qman(hdev, reg_base, queue_id); 5388 5389 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i); 5390 } 5391 } 5392 5393 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) 5394 { 5395 u32 sob_id; 5396 5397 /* VCMD normal interrupt */ 5398 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 5399 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, 5400 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 5401 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 5402 5403 /* VCMD abnormal interrupt */ 5404 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 5405 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, 5406 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 5407 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 5408 } 5409 5410 static void gaudi2_init_dec(struct hl_device *hdev) 5411 { 5412 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5413 u32 dcore_id, dec_id, dec_bit; 5414 u64 base_addr; 5415 5416 if (!hdev->asic_prop.decoder_enabled_mask) 5417 return; 5418 5419 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK) 5420 return; 5421 5422 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5423 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 5424 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 5425 5426 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 5427 continue; 5428 5429 base_addr = mmDCORE0_DEC0_CMD_BASE + 5430 BRDG_CTRL_BLOCK_OFFSET + 5431 dcore_id * DCORE_OFFSET + 5432 dec_id * DCORE_VDEC_OFFSET; 5433 5434 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 5435 5436 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 5437 } 5438 5439 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) { 5440 dec_bit = PCIE_DEC_SHIFT + dec_id; 5441 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 5442 continue; 5443 5444 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + 5445 dec_id * DCORE_VDEC_OFFSET; 5446 5447 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 5448 5449 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 5450 } 5451 } 5452 5453 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, 5454 u32 stlb_base, u32 asid, u64 phys_addr) 5455 { 5456 u32 status, timeout_usec; 5457 int rc; 5458 5459 if (hdev->pldm || !hdev->pdev) 5460 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5461 else 5462 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 5463 5464 WREG32(stlb_base + STLB_ASID_OFFSET, asid); 5465 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 5466 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT); 5467 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000); 5468 5469 rc = hl_poll_timeout( 5470 hdev, 5471 stlb_base + STLB_BUSY_OFFSET, 5472 status, 5473 !(status & 0x80000000), 5474 1000, 5475 timeout_usec); 5476 5477 if (rc) { 5478 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid); 5479 return rc; 5480 } 5481 5482 return 0; 5483 } 5484 5485 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base, 5486 u32 start_offset, u32 inv_start_val, 5487 u32 flags) 5488 { 5489 /* clear PMMU mem line cache (only needed in mmu range invalidation) */ 5490 if (flags & MMU_OP_CLEAR_MEMCACHE) 5491 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1); 5492 5493 if (flags & MMU_OP_SKIP_LOW_CACHE_INV) 5494 return; 5495 5496 WREG32(stlb_base + start_offset, inv_start_val); 5497 } 5498 5499 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base, 5500 struct gaudi2_cache_invld_params *inv_params) 5501 { 5502 u32 status, timeout_usec, start_offset; 5503 int rc; 5504 5505 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC : 5506 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5507 5508 /* poll PMMU mem line cache (only needed in mmu range invalidation) */ 5509 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) { 5510 rc = hl_poll_timeout( 5511 hdev, 5512 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 5513 status, 5514 status & 0x1, 5515 1000, 5516 timeout_usec); 5517 5518 if (rc) 5519 return rc; 5520 5521 /* Need to manually reset the status to 0 */ 5522 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0); 5523 } 5524 5525 /* Lower cache does not work with cache lines, hence we can skip its 5526 * invalidation upon map and invalidate only upon unmap 5527 */ 5528 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV) 5529 return 0; 5530 5531 start_offset = inv_params->range_invalidation ? 5532 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET; 5533 5534 rc = hl_poll_timeout( 5535 hdev, 5536 stlb_base + start_offset, 5537 status, 5538 !(status & 0x1), 5539 1000, 5540 timeout_usec); 5541 5542 return rc; 5543 } 5544 5545 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id) 5546 { 5547 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5548 u32 hw_cap; 5549 5550 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id); 5551 5552 if (gaudi2->hw_cap_initialized & hw_cap) 5553 return true; 5554 5555 return false; 5556 } 5557 5558 /* this function shall be called only for HMMUs for which capability bit is set */ 5559 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id) 5560 { 5561 u32 offset; 5562 5563 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5564 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset); 5565 } 5566 5567 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base, 5568 struct gaudi2_cache_invld_params *inv_params) 5569 { 5570 u32 start_offset; 5571 5572 if (inv_params->range_invalidation) { 5573 /* Set the addresses range 5574 * Note: that the start address we set in register, is not included in 5575 * the range of the invalidation, by design. 5576 * that's why we need to set lower address than the one we actually 5577 * want to be included in the range invalidation. 5578 */ 5579 u64 start = inv_params->start_va - 1; 5580 5581 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET; 5582 5583 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET, 5584 start >> MMU_RANGE_INV_VA_LSB_SHIFT); 5585 5586 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET, 5587 start >> MMU_RANGE_INV_VA_MSB_SHIFT); 5588 5589 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET, 5590 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT); 5591 5592 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET, 5593 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT); 5594 } else { 5595 start_offset = STLB_INV_ALL_START_OFFSET; 5596 } 5597 5598 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset, 5599 inv_params->inv_start_val, inv_params->flags); 5600 } 5601 5602 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev, 5603 int dcore_id, int hmmu_id, 5604 struct gaudi2_cache_invld_params *inv_params) 5605 { 5606 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 5607 5608 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params); 5609 } 5610 5611 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev, 5612 int dcore_id, int hmmu_id, 5613 struct gaudi2_cache_invld_params *inv_params) 5614 { 5615 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 5616 5617 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params); 5618 } 5619 5620 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev, 5621 struct gaudi2_cache_invld_params *inv_params) 5622 { 5623 int dcore_id, hmmu_id; 5624 5625 /* first send all invalidation commands */ 5626 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 5627 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 5628 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 5629 continue; 5630 5631 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params); 5632 } 5633 } 5634 5635 /* next, poll all invalidations status */ 5636 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 5637 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 5638 int rc; 5639 5640 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 5641 continue; 5642 5643 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id, 5644 inv_params); 5645 if (rc) 5646 return rc; 5647 } 5648 } 5649 5650 return 0; 5651 } 5652 5653 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 5654 { 5655 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5656 struct gaudi2_cache_invld_params invld_params; 5657 int rc = 0; 5658 5659 if (hdev->reset_info.hard_reset_pending) 5660 return rc; 5661 5662 invld_params.range_invalidation = false; 5663 invld_params.inv_start_val = 1; 5664 5665 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5666 invld_params.flags = flags; 5667 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 5668 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 5669 &invld_params); 5670 } else if (flags & MMU_OP_PHYS_PACK) { 5671 invld_params.flags = 0; 5672 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 5673 } 5674 5675 return rc; 5676 } 5677 5678 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 5679 u32 flags, u32 asid, u64 va, u64 size) 5680 { 5681 struct gaudi2_cache_invld_params invld_params = {0}; 5682 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5683 u64 start_va, end_va; 5684 u32 inv_start_val; 5685 int rc = 0; 5686 5687 if (hdev->reset_info.hard_reset_pending) 5688 return 0; 5689 5690 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT | 5691 1 << MMU_RANGE_INV_ASID_EN_SHIFT | 5692 asid << MMU_RANGE_INV_ASID_SHIFT); 5693 start_va = va; 5694 end_va = start_va + size; 5695 5696 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5697 /* As range invalidation does not support zero address we will 5698 * do full invalidation in this case 5699 */ 5700 if (start_va) { 5701 invld_params.range_invalidation = true; 5702 invld_params.start_va = start_va; 5703 invld_params.end_va = end_va; 5704 invld_params.inv_start_val = inv_start_val; 5705 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE; 5706 } else { 5707 invld_params.range_invalidation = false; 5708 invld_params.inv_start_val = 1; 5709 invld_params.flags = flags; 5710 } 5711 5712 5713 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 5714 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 5715 &invld_params); 5716 if (rc) 5717 return rc; 5718 5719 } else if (flags & MMU_OP_PHYS_PACK) { 5720 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va); 5721 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va); 5722 invld_params.inv_start_val = inv_start_val; 5723 invld_params.flags = flags; 5724 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 5725 } 5726 5727 return rc; 5728 } 5729 5730 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) 5731 { 5732 struct asic_fixed_properties *prop = &hdev->asic_prop; 5733 u64 hop0_addr; 5734 u32 asid, max_asid = prop->max_asid; 5735 int rc; 5736 5737 /* it takes too much time to init all of the ASIDs on palladium */ 5738 if (hdev->pldm) 5739 max_asid = min((u32) 8, max_asid); 5740 5741 for (asid = 0 ; asid < max_asid ; asid++) { 5742 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; 5743 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); 5744 if (rc) { 5745 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); 5746 return rc; 5747 } 5748 } 5749 5750 return 0; 5751 } 5752 5753 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5754 { 5755 u32 status, timeout_usec; 5756 int rc; 5757 5758 if (hdev->pldm || !hdev->pdev) 5759 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5760 else 5761 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5762 5763 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1); 5764 5765 rc = hl_poll_timeout( 5766 hdev, 5767 stlb_base + STLB_SRAM_INIT_OFFSET, 5768 status, 5769 !status, 5770 1000, 5771 timeout_usec); 5772 5773 if (rc) 5774 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); 5775 5776 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); 5777 if (rc) 5778 return rc; 5779 5780 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5781 5782 rc = hl_poll_timeout( 5783 hdev, 5784 stlb_base + STLB_INV_ALL_START_OFFSET, 5785 status, 5786 !status, 5787 1000, 5788 timeout_usec); 5789 5790 if (rc) 5791 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n"); 5792 5793 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1); 5794 5795 return rc; 5796 } 5797 5798 static int gaudi2_pci_mmu_init(struct hl_device *hdev) 5799 { 5800 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5801 u32 mmu_base, stlb_base; 5802 int rc; 5803 5804 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) 5805 return 0; 5806 5807 mmu_base = mmPMMU_HBW_MMU_BASE; 5808 stlb_base = mmPMMU_HBW_STLB_BASE; 5809 5810 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5811 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | 5812 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | 5813 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | 5814 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) | 5815 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT), 5816 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5817 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5818 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5819 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5820 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5821 5822 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0); 5823 5824 if (PAGE_SIZE == SZ_64K) { 5825 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ 5826 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5827 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | 5828 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | 5829 FIELD_PREP( 5830 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK, 5831 1), 5832 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK | 5833 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK | 5834 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5835 } 5836 5837 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5838 5839 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5840 if (rc) 5841 return rc; 5842 5843 gaudi2->hw_cap_initialized |= HW_CAP_PMMU; 5844 5845 return 0; 5846 } 5847 5848 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, 5849 int hmmu_id) 5850 { 5851 struct asic_fixed_properties *prop = &hdev->asic_prop; 5852 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5853 u32 offset, mmu_base, stlb_base, hw_cap; 5854 u8 dmmu_seq; 5855 int rc; 5856 5857 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id; 5858 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq; 5859 5860 /* 5861 * return if DMMU is already initialized or if it's not out of 5862 * isolation (due to cluster binning) 5863 */ 5864 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq))) 5865 return 0; 5866 5867 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5868 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset; 5869 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset; 5870 5871 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, 5872 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); 5873 5874 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5875 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | 5876 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | 5877 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | 5878 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) | 5879 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3), 5880 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5881 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5882 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5883 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5884 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5885 5886 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5887 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5888 5889 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5890 5891 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5892 if (rc) 5893 return rc; 5894 5895 gaudi2->hw_cap_initialized |= hw_cap; 5896 5897 return 0; 5898 } 5899 5900 static int gaudi2_hbm_mmu_init(struct hl_device *hdev) 5901 { 5902 int rc, dcore_id, hmmu_id; 5903 5904 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5905 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) { 5906 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id); 5907 if (rc) 5908 return rc; 5909 } 5910 5911 return 0; 5912 } 5913 5914 static int gaudi2_mmu_init(struct hl_device *hdev) 5915 { 5916 int rc; 5917 5918 rc = gaudi2_pci_mmu_init(hdev); 5919 if (rc) 5920 return rc; 5921 5922 rc = gaudi2_hbm_mmu_init(hdev); 5923 if (rc) 5924 return rc; 5925 5926 return 0; 5927 } 5928 5929 static int gaudi2_hw_init(struct hl_device *hdev) 5930 { 5931 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5932 int rc; 5933 5934 /* Let's mark in the H/W that we have reached this point. We check 5935 * this value in the reset_before_init function to understand whether 5936 * we need to reset the chip before doing H/W init. This register is 5937 * cleared by the H/W upon H/W reset 5938 */ 5939 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 5940 5941 /* Perform read from the device to make sure device is up */ 5942 RREG32(mmHW_STATE); 5943 5944 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 5945 * So we set it here and if anyone tries to move it later to 5946 * a different address, there will be an error 5947 */ 5948 if (hdev->asic_prop.iatu_done_by_fw) 5949 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE; 5950 5951 /* 5952 * Before pushing u-boot/linux to device, need to set the hbm bar to 5953 * base address of dram 5954 */ 5955 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 5956 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n"); 5957 return -EIO; 5958 } 5959 5960 rc = gaudi2_init_cpu(hdev); 5961 if (rc) { 5962 dev_err(hdev->dev, "failed to initialize CPU\n"); 5963 return rc; 5964 } 5965 5966 gaudi2_init_scrambler_hbm(hdev); 5967 gaudi2_init_kdma(hdev); 5968 5969 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC); 5970 if (rc) { 5971 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc); 5972 return rc; 5973 } 5974 5975 rc = gaudi2->cpucp_info_get(hdev); 5976 if (rc) { 5977 dev_err(hdev->dev, "Failed to get cpucp info\n"); 5978 return rc; 5979 } 5980 5981 rc = gaudi2_mmu_init(hdev); 5982 if (rc) 5983 return rc; 5984 5985 gaudi2_init_pdma(hdev); 5986 gaudi2_init_edma(hdev); 5987 gaudi2_init_sm(hdev); 5988 gaudi2_init_tpc(hdev); 5989 gaudi2_init_mme(hdev); 5990 gaudi2_init_rotator(hdev); 5991 gaudi2_init_dec(hdev); 5992 gaudi2_enable_timestamp(hdev); 5993 5994 rc = gaudi2_coresight_init(hdev); 5995 if (rc) 5996 goto disable_queues; 5997 5998 rc = gaudi2_enable_msix(hdev); 5999 if (rc) 6000 goto disable_queues; 6001 6002 /* Perform read from the device to flush all configuration */ 6003 RREG32(mmHW_STATE); 6004 6005 return 0; 6006 6007 disable_queues: 6008 gaudi2_disable_dma_qmans(hdev); 6009 gaudi2_disable_mme_qmans(hdev); 6010 gaudi2_disable_tpc_qmans(hdev); 6011 gaudi2_disable_rot_qmans(hdev); 6012 gaudi2_disable_nic_qmans(hdev); 6013 6014 gaudi2_disable_timestamp(hdev); 6015 6016 return rc; 6017 } 6018 6019 /** 6020 * gaudi2_send_hard_reset_cmd - common function to handle reset 6021 * 6022 * @hdev: pointer to the habanalabs device structure 6023 * 6024 * This function handles the various possible scenarios for reset. 6025 * It considers if reset is handled by driver\FW and what FW components are loaded 6026 */ 6027 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) 6028 { 6029 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 6030 bool heartbeat_reset, preboot_only, cpu_initialized = false; 6031 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6032 u32 cpu_boot_status; 6033 6034 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU); 6035 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT); 6036 6037 /* 6038 * Handle corner case where failure was at cpu management app load, 6039 * and driver didn't detect any failure while loading the FW, 6040 * then at such scenario driver will send only HALT_MACHINE 6041 * and no one will respond to this request since FW already back to preboot 6042 * and it cannot handle such cmd. 6043 * In this case next time the management app loads it'll check on events register 6044 * which will still have the halt indication, and will reboot the device. 6045 * The solution is to let preboot clear all relevant registers before next boot 6046 * once driver send COMMS_RST_DEV. 6047 */ 6048 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS); 6049 6050 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) && 6051 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL)) 6052 cpu_initialized = true; 6053 6054 /* 6055 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways: 6056 * 1. FW reset: FW initiate the reset sequence 6057 * 2. driver reset: FW will start HALT sequence (the preparations for the 6058 * reset but not the reset itself as it is not implemented 6059 * on their part) and LKD will wait to let FW complete the 6060 * sequence before issuing the reset 6061 */ 6062 if (!preboot_only && cpu_initialized) { 6063 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq), 6064 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id); 6065 6066 msleep(GAUDI2_CPU_RESET_WAIT_MSEC); 6067 } 6068 6069 /* 6070 * When working with preboot (without Linux/Boot fit) we can 6071 * communicate only using the COMMS commands to issue halt/reset. 6072 * 6073 * For the case in which we are working with Linux/Bootfit this is a hail-mary 6074 * attempt to revive the card in the small chance that the f/w has 6075 * experienced a watchdog event, which caused it to return back to preboot. 6076 * In that case, triggering reset through GIC won't help. We need to 6077 * trigger the reset as if Linux wasn't loaded. 6078 * 6079 * We do it only if the reset cause was HB, because that would be the 6080 * indication of such an event. 6081 * 6082 * In case watchdog hasn't expired but we still got HB, then this won't 6083 * do any damage. 6084 */ 6085 6086 if (heartbeat_reset || preboot_only || !cpu_initialized) { 6087 if (hdev->asic_prop.hard_reset_done_by_fw) 6088 hl_fw_ask_hard_reset_without_linux(hdev); 6089 else 6090 hl_fw_ask_halt_machine_without_linux(hdev); 6091 } 6092 } 6093 6094 /** 6095 * gaudi2_execute_hard_reset - execute hard reset by driver/FW 6096 * 6097 * @hdev: pointer to the habanalabs device structure 6098 * 6099 * This function executes hard reset based on if driver/FW should do the reset 6100 */ 6101 static void gaudi2_execute_hard_reset(struct hl_device *hdev) 6102 { 6103 if (hdev->asic_prop.hard_reset_done_by_fw) { 6104 gaudi2_send_hard_reset_cmd(hdev); 6105 return; 6106 } 6107 6108 /* Set device to handle FLR by H/W as we will put the device 6109 * CPU to halt mode 6110 */ 6111 WREG32(mmPCIE_AUX_FLR_CTRL, 6112 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 6113 6114 gaudi2_send_hard_reset_cmd(hdev); 6115 6116 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); 6117 } 6118 6119 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) 6120 { 6121 int i, rc = 0; 6122 u32 reg_val; 6123 6124 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 6125 rc = hl_poll_timeout( 6126 hdev, 6127 mmCPU_RST_STATUS_TO_HOST, 6128 reg_val, 6129 reg_val == CPU_RST_STATUS_SOFT_RST_DONE, 6130 1000, 6131 poll_timeout_us); 6132 6133 if (rc) 6134 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", 6135 reg_val); 6136 return rc; 6137 } 6138 6139 /** 6140 * gaudi2_execute_soft_reset - execute soft reset by driver/FW 6141 * 6142 * @hdev: pointer to the habanalabs device structure 6143 * @driver_performs_reset: true if driver should perform reset instead of f/w. 6144 * @poll_timeout_us: time to wait for response from f/w. 6145 * 6146 * This function executes soft reset based on if driver/FW should do the reset 6147 */ 6148 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset, 6149 u32 poll_timeout_us) 6150 { 6151 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 6152 int rc = 0; 6153 6154 if (!driver_performs_reset) { 6155 if (hl_is_fw_sw_ver_below(hdev, 1, 10)) { 6156 /* set SP to indicate reset request sent to FW */ 6157 if (dyn_regs->cpu_rst_status) 6158 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 6159 else 6160 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 6161 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 6162 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); 6163 6164 /* wait for f/w response */ 6165 rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); 6166 } else { 6167 rc = hl_fw_send_soft_reset(hdev); 6168 } 6169 return rc; 6170 } 6171 6172 /* Block access to engines, QMANs and SM during reset, these 6173 * RRs will be reconfigured after soft reset. 6174 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset. 6175 */ 6176 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1, 6177 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE); 6178 6179 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2, 6180 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE, 6181 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); 6182 6183 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); 6184 return 0; 6185 } 6186 6187 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us) 6188 { 6189 int i, rc = 0; 6190 u32 reg_val; 6191 6192 /* We poll the BTM done indication multiple times after reset due to 6193 * a HW errata 'GAUDI2_0300' 6194 */ 6195 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 6196 rc = hl_poll_timeout( 6197 hdev, 6198 mmPSOC_GLOBAL_CONF_BTM_FSM, 6199 reg_val, 6200 reg_val == 0, 6201 1000, 6202 poll_timeout_us); 6203 6204 if (rc) 6205 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); 6206 } 6207 6208 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 6209 { 6210 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6211 u32 poll_timeout_us, reset_sleep_ms; 6212 bool driver_performs_reset = false; 6213 int rc; 6214 6215 if (hdev->pldm) { 6216 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : 6217 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC; 6218 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC; 6219 } else { 6220 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC; 6221 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC; 6222 } 6223 6224 if (fw_reset) 6225 goto skip_reset; 6226 6227 gaudi2_reset_arcs(hdev); 6228 6229 if (hard_reset) { 6230 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; 6231 gaudi2_execute_hard_reset(hdev); 6232 } else { 6233 /* 6234 * As we have to support also work with preboot only (which does not supports 6235 * soft reset) we have to make sure that security is disabled before letting driver 6236 * do the reset. user shall control the BFE flags to avoid asking soft reset in 6237 * secured device with preboot only. 6238 */ 6239 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && 6240 !hdev->asic_prop.fw_security_enabled); 6241 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us); 6242 if (rc) 6243 return rc; 6244 } 6245 6246 skip_reset: 6247 if (driver_performs_reset || hard_reset) { 6248 /* 6249 * Instead of waiting for BTM indication we should wait for preboot ready: 6250 * Consider the below scenario: 6251 * 1. FW update is being triggered 6252 * - setting the dirty bit 6253 * 2. hard reset will be triggered due to the dirty bit 6254 * 3. FW initiates the reset: 6255 * - dirty bit cleared 6256 * - BTM indication cleared 6257 * - preboot ready indication cleared 6258 * 4. during hard reset: 6259 * - BTM indication will be set 6260 * - BIST test performed and another reset triggered 6261 * 5. only after this reset the preboot will set the preboot ready 6262 * 6263 * when polling on BTM indication alone we can lose sync with FW while trying to 6264 * communicate with FW that is during reset. 6265 * to overcome this we will always wait to preboot ready indication 6266 */ 6267 6268 /* without this sleep reset will not work */ 6269 msleep(reset_sleep_ms); 6270 6271 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU) 6272 hl_fw_wait_preboot_ready(hdev); 6273 else 6274 gaudi2_poll_btm_indication(hdev, poll_timeout_us); 6275 } 6276 6277 if (!gaudi2) 6278 return 0; 6279 6280 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); 6281 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); 6282 6283 /* 6284 * Clear NIC capability mask in order for driver to re-configure 6285 * NIC QMANs. NIC ports will not be re-configured during soft 6286 * reset as we call gaudi2_nic_init only during hard reset 6287 */ 6288 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK); 6289 6290 if (hard_reset) { 6291 gaudi2->hw_cap_initialized &= 6292 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK | 6293 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q | 6294 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK | 6295 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA | 6296 HW_CAP_MME_MASK | HW_CAP_ROT_MASK); 6297 6298 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat)); 6299 } else { 6300 gaudi2->hw_cap_initialized &= 6301 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET | 6302 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | 6303 HW_CAP_ROT_MASK); 6304 } 6305 return 0; 6306 } 6307 6308 static int gaudi2_suspend(struct hl_device *hdev) 6309 { 6310 int rc; 6311 6312 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 6313 if (rc) 6314 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 6315 6316 return rc; 6317 } 6318 6319 static int gaudi2_resume(struct hl_device *hdev) 6320 { 6321 return gaudi2_init_iatu(hdev); 6322 } 6323 6324 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 6325 void *cpu_addr, dma_addr_t dma_addr, size_t size) 6326 { 6327 int rc; 6328 6329 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 6330 VM_DONTCOPY | VM_NORESERVE); 6331 6332 #ifdef _HAS_DMA_MMAP_COHERENT 6333 6334 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 6335 if (rc) 6336 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 6337 6338 #else 6339 6340 rc = remap_pfn_range(vma, vma->vm_start, 6341 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 6342 size, vma->vm_page_prot); 6343 if (rc) 6344 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 6345 6346 #endif 6347 6348 return rc; 6349 } 6350 6351 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) 6352 { 6353 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6354 u64 hw_cap_mask = 0; 6355 u64 hw_tpc_cap_bit = 0; 6356 u64 hw_nic_cap_bit = 0; 6357 u64 hw_test_cap_bit = 0; 6358 6359 switch (hw_queue_id) { 6360 case GAUDI2_QUEUE_ID_PDMA_0_0: 6361 case GAUDI2_QUEUE_ID_PDMA_0_1: 6362 case GAUDI2_QUEUE_ID_PDMA_1_0: 6363 hw_cap_mask = HW_CAP_PDMA_MASK; 6364 break; 6365 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 6366 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 6367 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); 6368 break; 6369 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 6370 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE + 6371 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2); 6372 break; 6373 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 6374 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE + 6375 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2); 6376 break; 6377 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 6378 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE + 6379 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2); 6380 break; 6381 6382 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 6383 hw_test_cap_bit = HW_CAP_MME_SHIFT; 6384 break; 6385 6386 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 6387 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1; 6388 break; 6389 6390 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 6391 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2; 6392 break; 6393 6394 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 6395 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3; 6396 break; 6397 6398 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3: 6399 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + 6400 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2); 6401 6402 /* special case where cap bit refers to the first queue id */ 6403 if (!hw_tpc_cap_bit) 6404 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0)); 6405 break; 6406 6407 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 6408 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE + 6409 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2); 6410 break; 6411 6412 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 6413 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) + 6414 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2); 6415 break; 6416 6417 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 6418 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) + 6419 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2); 6420 break; 6421 6422 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 6423 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE); 6424 break; 6425 6426 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3: 6427 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2); 6428 break; 6429 6430 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3: 6431 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2); 6432 6433 /* special case where cap bit refers to the first queue id */ 6434 if (!hw_nic_cap_bit) 6435 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0)); 6436 break; 6437 6438 case GAUDI2_QUEUE_ID_CPU_PQ: 6439 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q); 6440 6441 default: 6442 return false; 6443 } 6444 6445 if (hw_tpc_cap_bit) 6446 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit)); 6447 6448 if (hw_nic_cap_bit) 6449 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit)); 6450 6451 if (hw_test_cap_bit) 6452 hw_cap_mask = BIT_ULL(hw_test_cap_bit); 6453 6454 return !!(gaudi2->hw_cap_initialized & hw_cap_mask); 6455 } 6456 6457 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id) 6458 { 6459 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6460 6461 switch (arc_id) { 6462 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 6463 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 6464 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id)); 6465 6466 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 6467 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 6468 6469 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 6470 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 6471 6472 default: 6473 return false; 6474 } 6475 } 6476 6477 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id) 6478 { 6479 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6480 6481 switch (arc_id) { 6482 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 6483 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 6484 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id)); 6485 break; 6486 6487 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 6488 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 6489 break; 6490 6491 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 6492 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 6493 break; 6494 6495 default: 6496 return; 6497 } 6498 } 6499 6500 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id) 6501 { 6502 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6503 6504 switch (arc_id) { 6505 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 6506 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 6507 gaudi2->active_hw_arc |= BIT_ULL(arc_id); 6508 break; 6509 6510 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 6511 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0); 6512 break; 6513 6514 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 6515 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0); 6516 break; 6517 6518 default: 6519 return; 6520 } 6521 } 6522 6523 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 6524 { 6525 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 6526 u32 pq_offset, reg_base, db_reg_offset, db_value; 6527 6528 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) { 6529 /* 6530 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs. 6531 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ 6532 * number. 6533 */ 6534 pq_offset = (hw_queue_id & 0x3) * 4; 6535 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6536 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset; 6537 } else { 6538 db_reg_offset = mmCPU_IF_PF_PQ_PI; 6539 } 6540 6541 db_value = pi; 6542 6543 /* ring the doorbell */ 6544 WREG32(db_reg_offset, db_value); 6545 6546 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) { 6547 /* make sure device CPU will read latest data from host */ 6548 mb(); 6549 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 6550 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 6551 } 6552 } 6553 6554 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) 6555 { 6556 __le64 *pbd = (__le64 *) bd; 6557 6558 /* The QMANs are on the host memory so a simple copy suffice */ 6559 pqe[0] = pbd[0]; 6560 pqe[1] = pbd[1]; 6561 } 6562 6563 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size, 6564 dma_addr_t *dma_handle, gfp_t flags) 6565 { 6566 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); 6567 } 6568 6569 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size, 6570 void *cpu_addr, dma_addr_t dma_handle) 6571 { 6572 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); 6573 } 6574 6575 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, 6576 u32 timeout, u64 *result) 6577 { 6578 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6579 6580 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) { 6581 if (result) 6582 *result = 0; 6583 return 0; 6584 } 6585 6586 if (!timeout) 6587 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC; 6588 6589 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result); 6590 } 6591 6592 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size, 6593 gfp_t mem_flags, dma_addr_t *dma_handle) 6594 { 6595 if (size > GAUDI2_DMA_POOL_BLK_SIZE) 6596 return NULL; 6597 6598 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 6599 } 6600 6601 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 6602 { 6603 dma_pool_free(hdev->dma_pool, vaddr, dma_addr); 6604 } 6605 6606 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, 6607 dma_addr_t *dma_handle) 6608 { 6609 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 6610 } 6611 6612 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 6613 { 6614 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 6615 } 6616 6617 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, 6618 enum dma_data_direction dir) 6619 { 6620 dma_addr_t dma_addr; 6621 6622 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); 6623 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) 6624 return 0; 6625 6626 return dma_addr; 6627 } 6628 6629 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, 6630 enum dma_data_direction dir) 6631 { 6632 dma_unmap_single(&hdev->pdev->dev, addr, len, dir); 6633 } 6634 6635 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) 6636 { 6637 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 6638 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6639 6640 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 6641 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 6642 return -EINVAL; 6643 } 6644 6645 /* Just check if CB address is valid */ 6646 6647 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6648 parser->user_cb_size, 6649 asic_prop->sram_user_base_address, 6650 asic_prop->sram_end_address)) 6651 return 0; 6652 6653 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6654 parser->user_cb_size, 6655 asic_prop->dram_user_base_address, 6656 asic_prop->dram_end_address)) 6657 return 0; 6658 6659 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) && 6660 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6661 parser->user_cb_size, 6662 asic_prop->dmmu.start_addr, 6663 asic_prop->dmmu.end_addr)) 6664 return 0; 6665 6666 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) { 6667 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6668 parser->user_cb_size, 6669 asic_prop->pmmu.start_addr, 6670 asic_prop->pmmu.end_addr) || 6671 hl_mem_area_inside_range( 6672 (u64) (uintptr_t) parser->user_cb, 6673 parser->user_cb_size, 6674 asic_prop->pmmu_huge.start_addr, 6675 asic_prop->pmmu_huge.end_addr)) 6676 return 0; 6677 6678 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) { 6679 if (!hdev->pdev) 6680 return 0; 6681 6682 if (!device_iommu_mapped(&hdev->pdev->dev)) 6683 return 0; 6684 } 6685 6686 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n", 6687 parser->user_cb, parser->user_cb_size); 6688 6689 return -EFAULT; 6690 } 6691 6692 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 6693 { 6694 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6695 6696 if (!parser->is_kernel_allocated_cb) 6697 return gaudi2_validate_cb_address(hdev, parser); 6698 6699 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 6700 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n"); 6701 return -EINVAL; 6702 } 6703 6704 return 0; 6705 } 6706 6707 static int gaudi2_send_heartbeat(struct hl_device *hdev) 6708 { 6709 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6710 6711 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6712 return 0; 6713 6714 return hl_fw_send_heartbeat(hdev); 6715 } 6716 6717 /* This is an internal helper function, used to update the KDMA mmu props. 6718 * Should be called with a proper kdma lock. 6719 */ 6720 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev, 6721 bool mmu_bypass, u32 asid) 6722 { 6723 u32 rw_asid, rw_mmu_bp; 6724 6725 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6726 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6727 6728 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) | 6729 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT); 6730 6731 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid); 6732 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp); 6733 } 6734 6735 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id, 6736 u32 mon_payload, u32 sync_value) 6737 { 6738 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm; 6739 u8 mask; 6740 6741 sob_offset = sob_id * 4; 6742 mon_offset = mon_id * 4; 6743 6744 /* Reset the SOB value */ 6745 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 6746 6747 /* Configure this address with CQ_ID 0 because CQ_EN is set */ 6748 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id); 6749 6750 /* Configure this address with CS index because CQ_EN is set */ 6751 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload); 6752 6753 sync_group_id = sob_id / 8; 6754 mask = ~(1 << (sob_id & 0x7)); 6755 mode = 1; /* comparison mode is "equal to" */ 6756 6757 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value); 6758 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode); 6759 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask); 6760 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id); 6761 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm); 6762 } 6763 6764 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */ 6765 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, 6766 u64 src_addr, u64 dst_addr, 6767 u32 size, bool is_memset) 6768 { 6769 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0; 6770 struct hl_cq_entry *cq_base; 6771 struct hl_cq *cq; 6772 u64 comp_addr; 6773 int rc; 6774 6775 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, 6776 GAUDI2_RESERVED_MON_KDMA_COMPLETION, 6777 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1); 6778 6779 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + 6780 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32)); 6781 6782 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 6783 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 6784 6785 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr)); 6786 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr)); 6787 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr)); 6788 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr)); 6789 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr)); 6790 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr)); 6791 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val); 6792 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size); 6793 6794 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | 6795 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); 6796 6797 if (is_memset) 6798 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1); 6799 6800 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask); 6801 6802 /* Wait for completion */ 6803 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION]; 6804 cq_base = cq->kernel_address; 6805 polling_addr = (u32 *)&cq_base[cq->ci]; 6806 6807 if (hdev->pldm) 6808 /* for each 1MB 20 second of timeout */ 6809 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20; 6810 else 6811 timeout = KDMA_TIMEOUT_USEC; 6812 6813 /* Polling */ 6814 rc = hl_poll_timeout_memory( 6815 hdev, 6816 polling_addr, 6817 status, 6818 (status == 1), 6819 1000, 6820 timeout, 6821 true); 6822 6823 *polling_addr = 0; 6824 6825 if (rc) { 6826 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n"); 6827 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT); 6828 return rc; 6829 } 6830 6831 cq->ci = hl_cq_inc_ptr(cq->ci); 6832 6833 return 0; 6834 } 6835 6836 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val) 6837 { 6838 u32 i; 6839 6840 for (i = 0 ; i < size ; i += sizeof(u32)) 6841 WREG32(addr + i, val); 6842 } 6843 6844 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable) 6845 { 6846 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6847 6848 if (enable) { 6849 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE); 6850 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0); 6851 } else { 6852 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED); 6853 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 6854 } 6855 } 6856 6857 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id) 6858 { 6859 return hdev->asic_prop.first_available_user_sob[0] + 6860 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0; 6861 } 6862 6863 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id) 6864 { 6865 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4; 6866 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6867 6868 /* Reset the SOB value */ 6869 WREG32(sob_addr, 0); 6870 } 6871 6872 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val, 6873 struct gaudi2_queues_test_info *msg_info) 6874 { 6875 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4; 6876 u32 tmp, sob_base = 1; 6877 struct packet_msg_short *msg_short_pkt = msg_info->kern_addr; 6878 size_t pkt_size = sizeof(struct packet_msg_short); 6879 int rc; 6880 6881 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | 6882 (1 << GAUDI2_PKT_CTL_EB_SHIFT) | 6883 (1 << GAUDI2_PKT_CTL_MB_SHIFT) | 6884 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) | 6885 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT); 6886 6887 msg_short_pkt->value = cpu_to_le32(sob_val); 6888 msg_short_pkt->ctl = cpu_to_le32(tmp); 6889 6890 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr); 6891 if (rc) 6892 dev_err(hdev->dev, 6893 "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id); 6894 6895 return rc; 6896 } 6897 6898 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val) 6899 { 6900 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4; 6901 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6902 u32 timeout_usec, tmp; 6903 int rc; 6904 6905 if (hdev->pldm) 6906 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; 6907 else 6908 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; 6909 6910 rc = hl_poll_timeout( 6911 hdev, 6912 sob_addr, 6913 tmp, 6914 (tmp == sob_val), 6915 1000, 6916 timeout_usec); 6917 6918 if (rc == -ETIMEDOUT) { 6919 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 6920 hw_queue_id, tmp); 6921 rc = -EIO; 6922 } 6923 6924 return rc; 6925 } 6926 6927 static int gaudi2_test_cpu_queue(struct hl_device *hdev) 6928 { 6929 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6930 6931 /* 6932 * check capability here as send_cpu_message() won't update the result 6933 * value if no capability 6934 */ 6935 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6936 return 0; 6937 6938 return hl_fw_test_cpu_queue(hdev); 6939 } 6940 6941 static int gaudi2_test_queues(struct hl_device *hdev) 6942 { 6943 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6944 struct gaudi2_queues_test_info *msg_info; 6945 u32 sob_val = 0x5a5a; 6946 int i, rc; 6947 6948 /* send test message on all enabled Qs */ 6949 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6950 if (!gaudi2_is_queue_enabled(hdev, i)) 6951 continue; 6952 6953 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0]; 6954 gaudi2_qman_set_test_mode(hdev, i, true); 6955 gaudi2_test_queue_clear(hdev, i); 6956 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info); 6957 if (rc) 6958 goto done; 6959 } 6960 6961 rc = gaudi2_test_cpu_queue(hdev); 6962 if (rc) 6963 goto done; 6964 6965 /* verify that all messages were processed */ 6966 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6967 if (!gaudi2_is_queue_enabled(hdev, i)) 6968 continue; 6969 6970 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val); 6971 if (rc) 6972 /* chip is not usable, no need for cleanups, just bail-out with error */ 6973 goto done; 6974 6975 gaudi2_test_queue_clear(hdev, i); 6976 gaudi2_qman_set_test_mode(hdev, i, false); 6977 } 6978 6979 done: 6980 return rc; 6981 } 6982 6983 static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6984 { 6985 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6986 size_t irq_arr_size; 6987 int rc; 6988 6989 gaudi2_init_arcs(hdev); 6990 6991 rc = gaudi2_scrub_arcs_dccm(hdev); 6992 if (rc) { 6993 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n"); 6994 return rc; 6995 } 6996 6997 gaudi2_init_security(hdev); 6998 6999 /* Unmask all IRQs since some could have been received during the soft reset */ 7000 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]); 7001 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); 7002 } 7003 7004 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7005 struct engines_data *e) 7006 { 7007 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1; 7008 struct asic_fixed_properties *prop = &hdev->asic_prop; 7009 unsigned long *mask = (unsigned long *) mask_arr; 7010 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n"; 7011 bool is_idle = true, is_eng_idle; 7012 int engine_idx, i, j; 7013 u64 offset; 7014 7015 if (e) 7016 hl_engine_data_sprintf(e, 7017 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n" 7018 "---- ---- ------- ------------ ------------- -------------\n"); 7019 7020 for (i = 0; i < NUM_OF_DCORES; i++) { 7021 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { 7022 int seq = i * NUM_OF_EDMA_PER_DCORE + j; 7023 7024 if (!(prop->edma_enabled_mask & BIT(seq))) 7025 continue; 7026 7027 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 + 7028 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 7029 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; 7030 7031 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset); 7032 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset); 7033 7034 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); 7035 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); 7036 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); 7037 7038 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 7039 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1); 7040 is_idle &= is_eng_idle; 7041 7042 if (mask && !is_eng_idle) 7043 set_bit(engine_idx, mask); 7044 7045 if (e) 7046 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N", 7047 qm_glbl_sts0, dma_core_sts0, dma_core_sts1); 7048 } 7049 } 7050 7051 return is_idle; 7052 } 7053 7054 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7055 struct engines_data *e) 7056 { 7057 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1; 7058 unsigned long *mask = (unsigned long *) mask_arr; 7059 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n"; 7060 bool is_idle = true, is_eng_idle; 7061 int engine_idx, i; 7062 u64 offset; 7063 7064 if (e) 7065 hl_engine_data_sprintf(e, 7066 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n" 7067 "---- ------- ------------ ------------- -------------\n"); 7068 7069 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 7070 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; 7071 offset = i * PDMA_OFFSET; 7072 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset); 7073 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset); 7074 7075 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); 7076 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); 7077 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); 7078 7079 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 7080 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1); 7081 is_idle &= is_eng_idle; 7082 7083 if (mask && !is_eng_idle) 7084 set_bit(engine_idx, mask); 7085 7086 if (e) 7087 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 7088 qm_glbl_sts0, dma_core_sts0, dma_core_sts1); 7089 } 7090 7091 return is_idle; 7092 } 7093 7094 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7095 struct engines_data *e) 7096 { 7097 unsigned long *mask = (unsigned long *) mask_arr; 7098 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; 7099 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 7100 bool is_idle = true, is_eng_idle; 7101 int engine_idx, i; 7102 u64 offset = 0; 7103 7104 /* NIC, twelve macros in Full chip */ 7105 if (e && hdev->nic_ports_mask) 7106 hl_engine_data_sprintf(e, 7107 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 7108 "--- ------- ------------ ----------\n"); 7109 7110 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 7111 if (!(i & 1)) 7112 offset = i / 2 * NIC_OFFSET; 7113 else 7114 offset += NIC_QM_OFFSET; 7115 7116 if (!(hdev->nic_ports_mask & BIT(i))) 7117 continue; 7118 7119 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i; 7120 7121 7122 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 7123 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset); 7124 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 7125 7126 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7127 is_idle &= is_eng_idle; 7128 7129 if (mask && !is_eng_idle) 7130 set_bit(engine_idx, mask); 7131 7132 if (e) 7133 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 7134 qm_glbl_sts0, qm_cgm_sts); 7135 } 7136 7137 return is_idle; 7138 } 7139 7140 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7141 struct engines_data *e) 7142 { 7143 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts; 7144 unsigned long *mask = (unsigned long *) mask_arr; 7145 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; 7146 bool is_idle = true, is_eng_idle; 7147 int engine_idx, i; 7148 u64 offset; 7149 7150 if (e) 7151 hl_engine_data_sprintf(e, 7152 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 7153 "--- ---- ------- ------------ ---------------\n"); 7154 /* MME, one per Dcore */ 7155 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 7156 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; 7157 offset = i * DCORE_OFFSET; 7158 7159 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset); 7160 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset); 7161 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset); 7162 7163 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7164 is_idle &= is_eng_idle; 7165 7166 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset); 7167 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 7168 is_idle &= is_eng_idle; 7169 7170 if (e) 7171 hl_engine_data_sprintf(e, mme_fmt, i, "N", 7172 is_eng_idle ? "Y" : "N", 7173 qm_glbl_sts0, 7174 mme_arch_sts); 7175 7176 if (mask && !is_eng_idle) 7177 set_bit(engine_idx, mask); 7178 } 7179 7180 return is_idle; 7181 } 7182 7183 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 7184 struct iterate_module_ctx *ctx) 7185 { 7186 struct gaudi2_tpc_idle_data *idle_data = ctx->data; 7187 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 7188 bool is_eng_idle; 7189 int engine_idx; 7190 7191 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) 7192 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7193 else 7194 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + 7195 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; 7196 7197 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); 7198 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); 7199 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); 7200 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); 7201 7202 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 7203 IS_TPC_IDLE(tpc_cfg_sts); 7204 *(idle_data->is_idle) &= is_eng_idle; 7205 7206 if (idle_data->mask && !is_eng_idle) 7207 set_bit(engine_idx, idle_data->mask); 7208 7209 if (idle_data->e) 7210 hl_engine_data_sprintf(idle_data->e, 7211 idle_data->tpc_fmt, dcore, inst, 7212 is_eng_idle ? "Y" : "N", 7213 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 7214 } 7215 7216 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7217 struct engines_data *e) 7218 { 7219 struct asic_fixed_properties *prop = &hdev->asic_prop; 7220 unsigned long *mask = (unsigned long *) mask_arr; 7221 bool is_idle = true; 7222 7223 struct gaudi2_tpc_idle_data tpc_idle_data = { 7224 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 7225 .e = e, 7226 .mask = mask, 7227 .is_idle = &is_idle, 7228 }; 7229 struct iterate_module_ctx tpc_iter = { 7230 .fn = &gaudi2_is_tpc_engine_idle, 7231 .data = &tpc_idle_data, 7232 }; 7233 7234 if (e && prop->tpc_enabled_mask) 7235 hl_engine_data_sprintf(e, 7236 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n" 7237 "---- --- ------- ------------ ---------- ------\n"); 7238 7239 gaudi2_iterate_tpcs(hdev, &tpc_iter); 7240 7241 return *tpc_idle_data.is_idle; 7242 } 7243 7244 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7245 struct engines_data *e) 7246 { 7247 struct asic_fixed_properties *prop = &hdev->asic_prop; 7248 unsigned long *mask = (unsigned long *) mask_arr; 7249 const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; 7250 const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; 7251 bool is_idle = true, is_eng_idle; 7252 u32 dec_swreg15, dec_enabled_bit; 7253 int engine_idx, i, j; 7254 u64 offset; 7255 7256 /* Decoders, two each Dcore and two shared PCIe decoders */ 7257 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 7258 hl_engine_data_sprintf(e, 7259 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 7260 "---- --- ------- ---------------\n"); 7261 7262 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 7263 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) { 7264 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j); 7265 if (!(prop->decoder_enabled_mask & dec_enabled_bit)) 7266 continue; 7267 7268 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 + 7269 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 7270 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET; 7271 7272 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset); 7273 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 7274 is_idle &= is_eng_idle; 7275 7276 if (mask && !is_eng_idle) 7277 set_bit(engine_idx, mask); 7278 7279 if (e) 7280 hl_engine_data_sprintf(e, dec_fmt, i, j, 7281 is_eng_idle ? "Y" : "N", dec_swreg15); 7282 } 7283 } 7284 7285 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 7286 hl_engine_data_sprintf(e, 7287 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 7288 "-------- ------- ---------------\n"); 7289 7290 /* Check shared(PCIe) decoders */ 7291 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) { 7292 dec_enabled_bit = PCIE_DEC_SHIFT + i; 7293 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit))) 7294 continue; 7295 7296 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i; 7297 offset = i * DCORE_DEC_OFFSET; 7298 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset); 7299 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 7300 is_idle &= is_eng_idle; 7301 7302 if (mask && !is_eng_idle) 7303 set_bit(engine_idx, mask); 7304 7305 if (e) 7306 hl_engine_data_sprintf(e, pcie_dec_fmt, i, 7307 is_eng_idle ? "Y" : "N", dec_swreg15); 7308 } 7309 7310 return is_idle; 7311 } 7312 7313 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7314 struct engines_data *e) 7315 { 7316 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n"; 7317 unsigned long *mask = (unsigned long *) mask_arr; 7318 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 7319 bool is_idle = true, is_eng_idle; 7320 int engine_idx, i; 7321 u64 offset; 7322 7323 if (e) 7324 hl_engine_data_sprintf(e, 7325 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n" 7326 "---- --- ------- ------------ ------------ ----------\n"); 7327 7328 for (i = 0 ; i < NUM_OF_ROT ; i++) { 7329 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; 7330 7331 offset = i * ROT_OFFSET; 7332 7333 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset); 7334 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset); 7335 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset); 7336 7337 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7338 is_idle &= is_eng_idle; 7339 7340 if (mask && !is_eng_idle) 7341 set_bit(engine_idx, mask); 7342 7343 if (e) 7344 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 7345 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7346 } 7347 7348 return is_idle; 7349 } 7350 7351 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7352 struct engines_data *e) 7353 { 7354 bool is_idle = true; 7355 7356 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e); 7357 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e); 7358 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e); 7359 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e); 7360 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e); 7361 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e); 7362 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e); 7363 7364 return is_idle; 7365 } 7366 7367 static void gaudi2_hw_queues_lock(struct hl_device *hdev) 7368 __acquires(&gaudi2->hw_queues_lock) 7369 { 7370 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7371 7372 spin_lock(&gaudi2->hw_queues_lock); 7373 } 7374 7375 static void gaudi2_hw_queues_unlock(struct hl_device *hdev) 7376 __releases(&gaudi2->hw_queues_lock) 7377 { 7378 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7379 7380 spin_unlock(&gaudi2->hw_queues_lock); 7381 } 7382 7383 static u32 gaudi2_get_pci_id(struct hl_device *hdev) 7384 { 7385 return hdev->pdev->device; 7386 } 7387 7388 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) 7389 { 7390 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7391 7392 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 7393 return 0; 7394 7395 return hl_fw_get_eeprom_data(hdev, data, max_size); 7396 } 7397 7398 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val) 7399 { 7400 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 7401 } 7402 7403 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7404 { 7405 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7406 7407 if (aggregate) { 7408 *size = (u32) sizeof(gaudi2->events_stat_aggregate); 7409 return gaudi2->events_stat_aggregate; 7410 } 7411 7412 *size = (u32) sizeof(gaudi2->events_stat); 7413 return gaudi2->events_stat; 7414 } 7415 7416 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id, 7417 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 7418 { 7419 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) * 7420 dcore_vdec_id + DCORE_OFFSET * dcore_id; 7421 7422 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 7423 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 7424 7425 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 7426 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 7427 7428 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 7429 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 7430 7431 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 7432 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 7433 7434 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 7435 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 7436 } 7437 7438 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid) 7439 { 7440 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 7441 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 7442 struct asic_fixed_properties *prop = &hdev->asic_prop; 7443 u32 dcore_offset = dcore_id * DCORE_OFFSET; 7444 u32 vdec_id, i, ports_offset, reg_val; 7445 u8 edma_seq_base; 7446 7447 /* EDMA */ 7448 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE; 7449 if (prop->edma_enabled_mask & BIT(edma_seq_base)) { 7450 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 7451 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 7452 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 7453 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 7454 } 7455 7456 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) { 7457 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 7458 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 7459 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 7460 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 7461 } 7462 7463 /* Sync Mngr */ 7464 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid); 7465 /* 7466 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID 7467 * for any access type 7468 */ 7469 if (dcore_id > 0) { 7470 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) | 7471 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT); 7472 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val); 7473 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0); 7474 } 7475 7476 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0); 7477 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid); 7478 7479 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) { 7480 ports_offset = i * DCORE_MME_SBTE_OFFSET; 7481 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP + 7482 dcore_offset + ports_offset, 0); 7483 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID + 7484 dcore_offset + ports_offset, rw_asid); 7485 } 7486 7487 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) { 7488 ports_offset = i * DCORE_MME_WB_OFFSET; 7489 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP + 7490 dcore_offset + ports_offset, 0); 7491 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID + 7492 dcore_offset + ports_offset, rw_asid); 7493 } 7494 7495 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 7496 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 7497 7498 /* 7499 * Decoders 7500 */ 7501 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) { 7502 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id)) 7503 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0); 7504 } 7505 } 7506 7507 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev, 7508 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 7509 { 7510 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id; 7511 7512 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 7513 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 7514 7515 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 7516 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 7517 7518 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 7519 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 7520 7521 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 7522 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 7523 7524 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 7525 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 7526 } 7527 7528 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id, 7529 u32 rw_asid, u32 rw_mmu_bp) 7530 { 7531 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id; 7532 7533 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp); 7534 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid); 7535 } 7536 7537 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid) 7538 { 7539 u32 reg_base, reg_offset, reg_val = 0; 7540 7541 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 7542 7543 /* Enable MMU and configure asid for all relevant ARC regions */ 7544 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0); 7545 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid); 7546 7547 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL); 7548 WREG32(reg_base + reg_offset, reg_val); 7549 7550 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW); 7551 WREG32(reg_base + reg_offset, reg_val); 7552 7553 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA); 7554 WREG32(reg_base + reg_offset, reg_val); 7555 7556 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA); 7557 WREG32(reg_base + reg_offset, reg_val); 7558 7559 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA); 7560 WREG32(reg_base + reg_offset, reg_val); 7561 7562 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE); 7563 WREG32(reg_base + reg_offset, reg_val); 7564 7565 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL); 7566 WREG32(reg_base + reg_offset, reg_val); 7567 7568 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL); 7569 WREG32(reg_base + reg_offset, reg_val); 7570 7571 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL); 7572 WREG32(reg_base + reg_offset, reg_val); 7573 7574 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL); 7575 WREG32(reg_base + reg_offset, reg_val); 7576 7577 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL); 7578 WREG32(reg_base + reg_offset, reg_val); 7579 } 7580 7581 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid) 7582 { 7583 int i; 7584 7585 if (hdev->fw_components & FW_TYPE_BOOT_CPU) 7586 return hl_fw_cpucp_engine_core_asid_set(hdev, asid); 7587 7588 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 7589 gaudi2_arc_mmu_prepare(hdev, i, asid); 7590 7591 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 7592 if (!gaudi2_is_queue_enabled(hdev, i)) 7593 continue; 7594 7595 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid); 7596 } 7597 7598 return 0; 7599 } 7600 7601 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) 7602 { 7603 struct asic_fixed_properties *prop = &hdev->asic_prop; 7604 u32 rw_asid, offset; 7605 int rc, i; 7606 7607 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) | 7608 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid); 7609 7610 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 7611 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 7612 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid); 7613 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0); 7614 7615 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 7616 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 7617 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid); 7618 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0); 7619 7620 /* ROT */ 7621 for (i = 0 ; i < NUM_OF_ROT ; i++) { 7622 offset = i * ROT_OFFSET; 7623 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid); 7624 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 7625 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK); 7626 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK); 7627 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK); 7628 } 7629 7630 /* Shared Decoders are the last bits in the decoders mask */ 7631 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0)) 7632 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0); 7633 7634 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1)) 7635 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0); 7636 7637 /* arc farm arc dup eng */ 7638 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 7639 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0); 7640 7641 rc = gaudi2_arc_mmu_prepare_all(hdev, asid); 7642 if (rc) 7643 return rc; 7644 7645 return 0; 7646 } 7647 7648 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 7649 struct iterate_module_ctx *ctx) 7650 { 7651 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 7652 7653 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 7654 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); 7655 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 7656 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid); 7657 } 7658 7659 /* zero the MMUBP and set the ASID */ 7660 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid) 7661 { 7662 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7663 struct gaudi2_tpc_mmu_data tpc_mmu_data; 7664 struct iterate_module_ctx tpc_iter = { 7665 .fn = &gaudi2_tpc_mmu_prepare, 7666 .data = &tpc_mmu_data, 7667 }; 7668 int rc, i; 7669 7670 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) { 7671 dev_crit(hdev->dev, "asid %u is too big\n", asid); 7672 return -EINVAL; 7673 } 7674 7675 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) 7676 return 0; 7677 7678 rc = gaudi2_mmu_shared_prepare(hdev, asid); 7679 if (rc) 7680 return rc; 7681 7682 /* configure DCORE MMUs */ 7683 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 7684 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 7685 gaudi2_iterate_tpcs(hdev, &tpc_iter); 7686 for (i = 0 ; i < NUM_OF_DCORES ; i++) 7687 gaudi2_mmu_dcore_prepare(hdev, i, asid); 7688 7689 return 0; 7690 } 7691 7692 static inline bool is_info_event(u32 event) 7693 { 7694 switch (event) { 7695 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 7696 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 7697 7698 /* return in case of NIC status event - these events are received periodically and not as 7699 * an indication to an error. 7700 */ 7701 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: 7702 return true; 7703 default: 7704 return false; 7705 } 7706 } 7707 7708 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, 7709 bool ratelimited, const char *fmt, ...) 7710 { 7711 struct va_format vaf; 7712 va_list args; 7713 7714 va_start(args, fmt); 7715 vaf.fmt = fmt; 7716 vaf.va = &args; 7717 7718 if (ratelimited) 7719 dev_err_ratelimited(hdev->dev, "%s: %pV\n", 7720 gaudi2_irq_map_table[event_type].valid ? 7721 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 7722 else 7723 dev_err(hdev->dev, "%s: %pV\n", 7724 gaudi2_irq_map_table[event_type].valid ? 7725 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 7726 7727 va_end(args); 7728 } 7729 7730 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7731 struct hl_eq_ecc_data *ecc_data) 7732 { 7733 u64 ecc_address = 0, ecc_syndrom = 0; 7734 u8 memory_wrapper_idx = 0; 7735 7736 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7737 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7738 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7739 7740 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, 7741 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.", 7742 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); 7743 7744 return !!ecc_data->is_critical; 7745 } 7746 7747 /* 7748 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 7749 * 7750 * @idx: the current pi/ci value 7751 * @q_len: the queue length (power of 2) 7752 * 7753 * @return the cyclically decremented index 7754 */ 7755 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) 7756 { 7757 u32 mask = q_len - 1; 7758 7759 /* 7760 * modular decrement is equivalent to adding (queue_size -1) 7761 * later we take LSBs to make sure the value is in the 7762 * range [0, queue_len - 1] 7763 */ 7764 return (idx + q_len - 1) & mask; 7765 } 7766 7767 /** 7768 * gaudi2_print_sw_config_stream_data - print SW config stream data 7769 * 7770 * @hdev: pointer to the habanalabs device structure 7771 * @stream: the QMAN's stream 7772 * @qman_base: base address of QMAN registers block 7773 */ 7774 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, 7775 u32 stream, u64 qman_base) 7776 { 7777 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 7778 u32 cq_ptr_lo_off, size; 7779 7780 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; 7781 7782 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + 7783 stream * cq_ptr_lo_off; 7784 7785 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 7786 7787 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 7788 7789 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 7790 size = RREG32(cq_tsize); 7791 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", 7792 stream, cq_ptr, size); 7793 } 7794 7795 /** 7796 * gaudi2_print_last_pqes_on_err - print last PQEs on error 7797 * 7798 * @hdev: pointer to the habanalabs device structure 7799 * @qid_base: first QID of the QMAN (out of 4 streams) 7800 * @stream: the QMAN's stream 7801 * @qman_base: base address of QMAN registers block 7802 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 7803 */ 7804 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, 7805 u64 qman_base, bool pr_sw_conf) 7806 { 7807 u32 ci, qm_ci_stream_off; 7808 struct hl_hw_queue *q; 7809 u64 pq_ci; 7810 int i; 7811 7812 q = &hdev->kernel_queues[qid_base + stream]; 7813 7814 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; 7815 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + 7816 stream * qm_ci_stream_off; 7817 7818 hdev->asic_funcs->hw_queues_lock(hdev); 7819 7820 if (pr_sw_conf) 7821 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7822 7823 ci = RREG32(pq_ci); 7824 7825 /* we should start printing form ci -1 */ 7826 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 7827 7828 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 7829 struct hl_bd *bd; 7830 u64 addr; 7831 u32 len; 7832 7833 bd = q->kernel_address; 7834 bd += ci; 7835 7836 len = le32_to_cpu(bd->len); 7837 /* len 0 means uninitialized entry- break */ 7838 if (!len) 7839 break; 7840 7841 addr = le64_to_cpu(bd->ptr); 7842 7843 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", 7844 stream, ci, addr, len); 7845 7846 /* get previous ci, wrap if needed */ 7847 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 7848 } 7849 7850 hdev->asic_funcs->hw_queues_unlock(hdev); 7851 } 7852 7853 /** 7854 * print_qman_data_on_err - extract QMAN data on error 7855 * 7856 * @hdev: pointer to the habanalabs device structure 7857 * @qid_base: first QID of the QMAN (out of 4 streams) 7858 * @stream: the QMAN's stream 7859 * @qman_base: base address of QMAN registers block 7860 * 7861 * This function attempt to extract as much data as possible on QMAN error. 7862 * On upper CP print the SW config stream data and last 8 PQEs. 7863 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 7864 */ 7865 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) 7866 { 7867 u32 i; 7868 7869 if (stream != QMAN_STREAMS) { 7870 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); 7871 return; 7872 } 7873 7874 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7875 7876 for (i = 0 ; i < QMAN_STREAMS ; i++) 7877 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); 7878 } 7879 7880 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, 7881 u64 qman_base, u32 qid_base) 7882 { 7883 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; 7884 u64 glbl_sts_addr, arb_err_addr; 7885 char reg_desc[32]; 7886 7887 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE); 7888 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE); 7889 7890 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */ 7891 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7892 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7893 7894 if (!glbl_sts_val) 7895 continue; 7896 7897 if (i == QMAN_STREAMS) { 7898 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7899 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; 7900 } else { 7901 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7902 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; 7903 } 7904 7905 for (j = 0 ; j < num_error_causes ; j++) 7906 if (glbl_sts_val & BIT(j)) { 7907 gaudi2_print_event(hdev, event_type, true, 7908 "%s. err cause: %s", reg_desc, 7909 i == QMAN_STREAMS ? 7910 gaudi2_qman_lower_cp_error_cause[j] : 7911 gaudi2_qman_error_cause[j]); 7912 error_count++; 7913 } 7914 7915 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7916 } 7917 7918 arb_err_val = RREG32(arb_err_addr); 7919 7920 if (!arb_err_val) 7921 goto out; 7922 7923 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7924 if (arb_err_val & BIT(j)) { 7925 gaudi2_print_event(hdev, event_type, true, 7926 "ARB_ERR. err cause: %s", 7927 gaudi2_qman_arb_error_cause[j]); 7928 error_count++; 7929 } 7930 } 7931 7932 out: 7933 return error_count; 7934 } 7935 7936 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, 7937 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7938 enum gaudi2_engine_id id, u64 *event_mask) 7939 { 7940 u32 razwi_hi, razwi_lo, razwi_xy; 7941 u16 eng_id = id; 7942 u8 rd_wr_flag; 7943 7944 if (is_write) { 7945 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI); 7946 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO); 7947 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY); 7948 rd_wr_flag = HL_RAZWI_WRITE; 7949 } else { 7950 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); 7951 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO); 7952 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY); 7953 rd_wr_flag = HL_RAZWI_READ; 7954 } 7955 7956 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, 7957 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7958 7959 dev_err_ratelimited(hdev->dev, 7960 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7961 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7962 } 7963 7964 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, 7965 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7966 enum gaudi2_engine_id id, u64 *event_mask) 7967 { 7968 u64 razwi_addr = CFG_BASE; 7969 u32 razwi_xy; 7970 u16 eng_id = id; 7971 u8 rd_wr_flag; 7972 7973 if (is_write) { 7974 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI); 7975 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY); 7976 rd_wr_flag = HL_RAZWI_WRITE; 7977 } else { 7978 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); 7979 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY); 7980 rd_wr_flag = HL_RAZWI_READ; 7981 } 7982 7983 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); 7984 dev_err_ratelimited(hdev->dev, 7985 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n", 7986 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, 7987 razwi_xy); 7988 } 7989 7990 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, 7991 enum razwi_event_sources module, u8 module_idx) 7992 { 7993 switch (module) { 7994 case RAZWI_TPC: 7995 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) 7996 return GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7997 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7998 (module_idx % NUM_OF_TPC_PER_DCORE) + 7999 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 8000 8001 case RAZWI_MME: 8002 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + 8003 (module_idx * ENGINE_ID_DCORE_OFFSET)); 8004 8005 case RAZWI_EDMA: 8006 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 8007 (module_idx % NUM_OF_EDMA_PER_DCORE)); 8008 8009 case RAZWI_PDMA: 8010 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); 8011 8012 case RAZWI_NIC: 8013 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); 8014 8015 case RAZWI_DEC: 8016 if (module_idx == 8) 8017 return GAUDI2_PCIE_ENGINE_ID_DEC_0; 8018 8019 if (module_idx == 9) 8020 return GAUDI2_PCIE_ENGINE_ID_DEC_1; 8021 ; 8022 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 8023 (module_idx % NUM_OF_DEC_PER_DCORE) + 8024 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 8025 8026 case RAZWI_ROT: 8027 return GAUDI2_ENGINE_ID_ROT_0 + module_idx; 8028 8029 default: 8030 return GAUDI2_ENGINE_ID_SIZE; 8031 } 8032 } 8033 8034 /* 8035 * This function handles RR(Range register) hit events. 8036 * raised be initiators not PSOC RAZWI. 8037 */ 8038 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, 8039 enum razwi_event_sources module, u8 module_idx, 8040 u8 module_sub_idx, u64 *event_mask) 8041 { 8042 bool via_sft = false; 8043 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id; 8044 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr; 8045 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; 8046 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; 8047 char initiator_name[64]; 8048 8049 switch (module) { 8050 case RAZWI_TPC: 8051 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; 8052 8053 if (hl_is_fw_sw_ver_below(hdev, 1, 9) && 8054 !hdev->asic_prop.fw_security_enabled && 8055 ((module_idx == 0) || (module_idx == 1))) 8056 lbw_rtr_id = DCORE0_RTR0; 8057 else 8058 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; 8059 sprintf(initiator_name, "TPC_%u", module_idx); 8060 break; 8061 case RAZWI_MME: 8062 sprintf(initiator_name, "MME_%u", module_idx); 8063 switch (module_sub_idx) { 8064 case MME_WAP0: 8065 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0; 8066 break; 8067 case MME_WAP1: 8068 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1; 8069 break; 8070 case MME_WRITE: 8071 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write; 8072 break; 8073 case MME_READ: 8074 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read; 8075 break; 8076 case MME_SBTE0: 8077 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0; 8078 break; 8079 case MME_SBTE1: 8080 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1; 8081 break; 8082 case MME_SBTE2: 8083 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2; 8084 break; 8085 case MME_SBTE3: 8086 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3; 8087 break; 8088 case MME_SBTE4: 8089 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4; 8090 break; 8091 default: 8092 return; 8093 } 8094 lbw_rtr_id = hbw_rtr_id; 8095 break; 8096 case RAZWI_EDMA: 8097 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx]; 8098 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE; 8099 /* SFT has separate MSTR_IF for LBW, only there we can 8100 * read the LBW razwi related registers 8101 */ 8102 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE + 8103 dcore_id * SFT_DCORE_OFFSET; 8104 via_sft = true; 8105 sprintf(initiator_name, "EDMA_%u", module_idx); 8106 break; 8107 case RAZWI_PDMA: 8108 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx]; 8109 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx]; 8110 sprintf(initiator_name, "PDMA_%u", module_idx); 8111 break; 8112 case RAZWI_NIC: 8113 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx]; 8114 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx]; 8115 sprintf(initiator_name, "NIC_%u", module_idx); 8116 break; 8117 case RAZWI_DEC: 8118 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx]; 8119 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx]; 8120 sprintf(initiator_name, "DEC_%u", module_idx); 8121 break; 8122 case RAZWI_ROT: 8123 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx]; 8124 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx]; 8125 sprintf(initiator_name, "ROT_%u", module_idx); 8126 break; 8127 default: 8128 return; 8129 } 8130 8131 /* Find router mstr_if register base */ 8132 if (!via_sft) { 8133 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE; 8134 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE; 8135 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE + 8136 dcore_id * DCORE_OFFSET + 8137 dcore_rtr_id * DCORE_RTR_OFFSET + 8138 RTR_MSTR_IF_OFFSET; 8139 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr + 8140 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET); 8141 } 8142 8143 /* Find out event cause by reading "RAZWI_HAPPENED" registers */ 8144 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED); 8145 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED); 8146 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 8147 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 8148 8149 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); 8150 if (hbw_shrd_aw) { 8151 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true, 8152 initiator_name, eng_id, event_mask); 8153 8154 /* Clear event indication */ 8155 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw); 8156 } 8157 8158 if (hbw_shrd_ar) { 8159 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false, 8160 initiator_name, eng_id, event_mask); 8161 8162 /* Clear event indication */ 8163 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar); 8164 } 8165 8166 if (lbw_shrd_aw) { 8167 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true, 8168 initiator_name, eng_id, event_mask); 8169 8170 /* Clear event indication */ 8171 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw); 8172 } 8173 8174 if (lbw_shrd_ar) { 8175 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false, 8176 initiator_name, eng_id, event_mask); 8177 8178 /* Clear event indication */ 8179 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar); 8180 } 8181 } 8182 8183 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) 8184 { 8185 struct asic_fixed_properties *prop = &hdev->asic_prop; 8186 u8 mod_idx, sub_mod; 8187 8188 /* check all TPCs */ 8189 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { 8190 if (prop->tpc_enabled_mask & BIT(mod_idx)) 8191 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL); 8192 } 8193 8194 /* check all MMEs */ 8195 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 8196 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) 8197 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, 8198 sub_mod, NULL); 8199 8200 /* check all EDMAs */ 8201 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 8202 if (prop->edma_enabled_mask & BIT(mod_idx)) 8203 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL); 8204 8205 /* check all PDMAs */ 8206 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) 8207 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL); 8208 8209 /* check all NICs */ 8210 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) 8211 if (hdev->nic_ports_mask & BIT(mod_idx)) 8212 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, 8213 NULL); 8214 8215 /* check all DECs */ 8216 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) 8217 if (prop->decoder_enabled_mask & BIT(mod_idx)) 8218 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL); 8219 8220 /* check all ROTs */ 8221 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) 8222 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); 8223 } 8224 8225 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size, 8226 u32 axuser_xy, u32 *base, u16 *eng_id, 8227 char *eng_name) 8228 { 8229 8230 int i, num_of_eng = 0; 8231 u16 str_size = 0; 8232 8233 for (i = 0 ; i < array_size ; i++) { 8234 if (axuser_xy != razwi_info[i].axuser_xy) 8235 continue; 8236 8237 eng_id[num_of_eng] = razwi_info[i].eng_id; 8238 base[num_of_eng] = razwi_info[i].rtr_ctrl; 8239 if (!num_of_eng) 8240 str_size += snprintf(eng_name + str_size, 8241 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s", 8242 razwi_info[i].eng_name); 8243 else 8244 str_size += snprintf(eng_name + str_size, 8245 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s", 8246 razwi_info[i].eng_name); 8247 num_of_eng++; 8248 } 8249 8250 return num_of_eng; 8251 } 8252 8253 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg, 8254 u64 *event_mask) 8255 { 8256 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0; 8257 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR]; 8258 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR]; 8259 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE]; 8260 bool razwi_happened = false; 8261 u64 addr; 8262 int i; 8263 8264 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info), 8265 axuser_xy, base, eng_id, eng_name_str); 8266 8267 /* If no match for XY coordinates, try to find it in MME razwi table */ 8268 if (!num_of_eng) { 8269 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg); 8270 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info, 8271 ARRAY_SIZE(mme_razwi_info), 8272 axuser_xy, base, eng_id, 8273 eng_name_str); 8274 } 8275 8276 for (i = 0 ; i < num_of_eng ; i++) { 8277 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) { 8278 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI); 8279 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO); 8280 addr = ((u64)addr_hi << 32) + addr_lo; 8281 if (addr) { 8282 dev_err(hdev->dev, 8283 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", 8284 eng_name_str, addr); 8285 hl_handle_razwi(hdev, addr, &eng_id[0], 8286 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask); 8287 razwi_happened = true; 8288 } 8289 } 8290 8291 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) { 8292 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI); 8293 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO); 8294 addr = ((u64)addr_hi << 32) + addr_lo; 8295 if (addr) { 8296 dev_err(hdev->dev, 8297 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", 8298 eng_name_str, addr); 8299 hl_handle_razwi(hdev, addr, &eng_id[0], 8300 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask); 8301 razwi_happened = true; 8302 } 8303 } 8304 8305 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) { 8306 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR); 8307 if (addr_lo) { 8308 dev_err(hdev->dev, 8309 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n", 8310 eng_name_str, addr_lo); 8311 hl_handle_razwi(hdev, addr_lo, &eng_id[0], 8312 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask); 8313 razwi_happened = true; 8314 } 8315 } 8316 8317 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) { 8318 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR); 8319 if (addr_lo) { 8320 dev_err(hdev->dev, 8321 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n", 8322 eng_name_str, addr_lo); 8323 hl_handle_razwi(hdev, addr_lo, &eng_id[0], 8324 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask); 8325 razwi_happened = true; 8326 } 8327 } 8328 /* In common case the loop will break, when there is only one engine id, or 8329 * several engines with the same router. The exceptional case is with psoc razwi 8330 * from EDMA, where it's possible to get axuser id which fits 2 routers (2 8331 * interfaces of sft router). In this case, maybe the first router won't hold info 8332 * and we will need to iterate on the other router. 8333 */ 8334 if (razwi_happened) 8335 break; 8336 } 8337 8338 return razwi_happened; 8339 } 8340 8341 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ 8342 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) 8343 { 8344 u32 razwi_mask_info, razwi_intr = 0, error_count = 0; 8345 8346 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { 8347 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); 8348 if (!razwi_intr) 8349 return 0; 8350 } 8351 8352 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 8353 8354 dev_err_ratelimited(hdev->dev, 8355 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 8356 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 8357 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 8358 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 8359 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info), 8360 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 8361 8362 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask)) 8363 error_count++; 8364 else 8365 dev_err_ratelimited(hdev->dev, 8366 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n", 8367 razwi_mask_info); 8368 8369 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ 8370 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) 8371 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); 8372 8373 return error_count; 8374 } 8375 8376 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type) 8377 { 8378 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8379 8380 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET); 8381 8382 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) { 8383 if (sts_val & BIT(i)) { 8384 gaudi2_print_event(hdev, event_type, true, 8385 "err cause: %s", gaudi2_qm_sei_error_cause[i]); 8386 sts_clr_val |= BIT(i); 8387 error_count++; 8388 } 8389 } 8390 8391 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val); 8392 8393 return error_count; 8394 } 8395 8396 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, 8397 bool extended_err_check, u64 *event_mask) 8398 { 8399 enum razwi_event_sources module; 8400 u32 error_count = 0; 8401 u64 qman_base; 8402 u8 index; 8403 8404 switch (event_type) { 8405 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP: 8406 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 8407 qman_base = mmDCORE0_TPC0_QM_BASE + 8408 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + 8409 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; 8410 module = RAZWI_TPC; 8411 break; 8412 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 8413 qman_base = mmDCORE0_TPC6_QM_BASE; 8414 module = RAZWI_TPC; 8415 break; 8416 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 8417 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 8418 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 8419 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 8420 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 8421 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 8422 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 8423 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; 8424 module = RAZWI_MME; 8425 break; 8426 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 8427 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 8428 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; 8429 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; 8430 module = RAZWI_PDMA; 8431 break; 8432 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 8433 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 8434 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 8435 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; 8436 module = RAZWI_ROT; 8437 break; 8438 default: 8439 return 0; 8440 } 8441 8442 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 8443 8444 /* There is a single event per NIC macro, so should check its both QMAN blocks */ 8445 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE && 8446 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE) 8447 error_count += _gaudi2_handle_qm_sei_err(hdev, 8448 qman_base + NIC_QM_OFFSET, event_type); 8449 8450 if (extended_err_check) { 8451 /* check if RAZWI happened */ 8452 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask); 8453 hl_check_for_glbl_errors(hdev); 8454 } 8455 8456 return error_count; 8457 } 8458 8459 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8460 { 8461 u32 qid_base, error_count = 0; 8462 u64 qman_base; 8463 u8 index = 0; 8464 8465 switch (event_type) { 8466 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: 8467 index = event_type - GAUDI2_EVENT_TPC0_QM; 8468 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS; 8469 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8470 break; 8471 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM: 8472 index = event_type - GAUDI2_EVENT_TPC6_QM; 8473 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS; 8474 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8475 break; 8476 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM: 8477 index = event_type - GAUDI2_EVENT_TPC12_QM; 8478 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS; 8479 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8480 break; 8481 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM: 8482 index = event_type - GAUDI2_EVENT_TPC18_QM; 8483 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS; 8484 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8485 break; 8486 case GAUDI2_EVENT_TPC24_QM: 8487 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 8488 qman_base = mmDCORE0_TPC6_QM_BASE; 8489 break; 8490 case GAUDI2_EVENT_MME0_QM: 8491 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 8492 qman_base = mmDCORE0_MME_QM_BASE; 8493 break; 8494 case GAUDI2_EVENT_MME1_QM: 8495 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 8496 qman_base = mmDCORE1_MME_QM_BASE; 8497 break; 8498 case GAUDI2_EVENT_MME2_QM: 8499 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 8500 qman_base = mmDCORE2_MME_QM_BASE; 8501 break; 8502 case GAUDI2_EVENT_MME3_QM: 8503 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 8504 qman_base = mmDCORE3_MME_QM_BASE; 8505 break; 8506 case GAUDI2_EVENT_HDMA0_QM: 8507 index = 0; 8508 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0; 8509 qman_base = mmDCORE0_EDMA0_QM_BASE; 8510 break; 8511 case GAUDI2_EVENT_HDMA1_QM: 8512 index = 1; 8513 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0; 8514 qman_base = mmDCORE0_EDMA1_QM_BASE; 8515 break; 8516 case GAUDI2_EVENT_HDMA2_QM: 8517 index = 2; 8518 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0; 8519 qman_base = mmDCORE1_EDMA0_QM_BASE; 8520 break; 8521 case GAUDI2_EVENT_HDMA3_QM: 8522 index = 3; 8523 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0; 8524 qman_base = mmDCORE1_EDMA1_QM_BASE; 8525 break; 8526 case GAUDI2_EVENT_HDMA4_QM: 8527 index = 4; 8528 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0; 8529 qman_base = mmDCORE2_EDMA0_QM_BASE; 8530 break; 8531 case GAUDI2_EVENT_HDMA5_QM: 8532 index = 5; 8533 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0; 8534 qman_base = mmDCORE2_EDMA1_QM_BASE; 8535 break; 8536 case GAUDI2_EVENT_HDMA6_QM: 8537 index = 6; 8538 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0; 8539 qman_base = mmDCORE3_EDMA0_QM_BASE; 8540 break; 8541 case GAUDI2_EVENT_HDMA7_QM: 8542 index = 7; 8543 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0; 8544 qman_base = mmDCORE3_EDMA1_QM_BASE; 8545 break; 8546 case GAUDI2_EVENT_PDMA0_QM: 8547 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0; 8548 qman_base = mmPDMA0_QM_BASE; 8549 break; 8550 case GAUDI2_EVENT_PDMA1_QM: 8551 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0; 8552 qman_base = mmPDMA1_QM_BASE; 8553 break; 8554 case GAUDI2_EVENT_ROTATOR0_ROT0_QM: 8555 qid_base = GAUDI2_QUEUE_ID_ROT_0_0; 8556 qman_base = mmROT0_QM_BASE; 8557 break; 8558 case GAUDI2_EVENT_ROTATOR1_ROT1_QM: 8559 qid_base = GAUDI2_QUEUE_ID_ROT_1_0; 8560 qman_base = mmROT1_QM_BASE; 8561 break; 8562 default: 8563 return 0; 8564 } 8565 8566 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); 8567 8568 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ 8569 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) { 8570 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 8571 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask); 8572 } 8573 8574 hl_check_for_glbl_errors(hdev); 8575 8576 return error_count; 8577 } 8578 8579 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) 8580 { 8581 u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm; 8582 8583 for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) { 8584 sts_clr_val = 0; 8585 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS + 8586 (arc_farm * ARC_FARM_OFFSET)); 8587 8588 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { 8589 if (sts_val & BIT(i)) { 8590 gaudi2_print_event(hdev, event_type, true, 8591 "ARC FARM ARC %u err cause: %s", 8592 arc_farm, gaudi2_arc_sei_error_cause[i]); 8593 sts_clr_val |= BIT(i); 8594 error_count++; 8595 } 8596 } 8597 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET), 8598 sts_clr_val); 8599 } 8600 8601 hl_check_for_glbl_errors(hdev); 8602 8603 return error_count; 8604 } 8605 8606 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) 8607 { 8608 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8609 8610 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS); 8611 8612 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) { 8613 if (sts_val & BIT(i)) { 8614 gaudi2_print_event(hdev, event_type, true, 8615 "err cause: %s", gaudi2_cpu_sei_error_cause[i]); 8616 sts_clr_val |= BIT(i); 8617 error_count++; 8618 } 8619 } 8620 8621 hl_check_for_glbl_errors(hdev); 8622 8623 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); 8624 8625 return error_count; 8626 } 8627 8628 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type, 8629 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8630 u64 *event_mask) 8631 { 8632 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8633 u32 error_count = 0; 8634 int i; 8635 8636 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++) 8637 if (intr_cause_data & BIT(i)) { 8638 gaudi2_print_event(hdev, event_type, true, 8639 "err cause: %s", guadi2_rot_error_cause[i]); 8640 error_count++; 8641 } 8642 8643 /* check if RAZWI happened */ 8644 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask); 8645 hl_check_for_glbl_errors(hdev); 8646 8647 return error_count; 8648 } 8649 8650 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type, 8651 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8652 u64 *event_mask) 8653 { 8654 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8655 u32 error_count = 0; 8656 int i; 8657 8658 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++) 8659 if (intr_cause_data & BIT(i)) { 8660 gaudi2_print_event(hdev, event_type, true, 8661 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]); 8662 error_count++; 8663 } 8664 8665 /* check if RAZWI happened */ 8666 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask); 8667 hl_check_for_glbl_errors(hdev); 8668 8669 return error_count; 8670 } 8671 8672 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type, 8673 u64 *event_mask) 8674 { 8675 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0; 8676 int i; 8677 8678 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES) 8679 /* DCORE DEC */ 8680 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR + 8681 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) + 8682 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE); 8683 else 8684 /* PCIE DEC */ 8685 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET * 8686 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES); 8687 8688 sts_val = RREG32(sts_addr); 8689 8690 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) { 8691 if (sts_val & BIT(i)) { 8692 gaudi2_print_event(hdev, event_type, true, 8693 "err cause: %s", gaudi2_dec_error_cause[i]); 8694 sts_clr_val |= BIT(i); 8695 error_count++; 8696 } 8697 } 8698 8699 /* check if RAZWI happened */ 8700 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask); 8701 hl_check_for_glbl_errors(hdev); 8702 8703 /* Write 1 clear errors */ 8704 WREG32(sts_addr, sts_clr_val); 8705 8706 return error_count; 8707 } 8708 8709 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8710 u64 *event_mask) 8711 { 8712 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8713 int i; 8714 8715 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index; 8716 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index; 8717 8718 sts_val = RREG32(sts_addr); 8719 8720 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) { 8721 if (sts_val & BIT(i)) { 8722 gaudi2_print_event(hdev, event_type, true, 8723 "err cause: %s", guadi2_mme_error_cause[i]); 8724 sts_clr_val |= BIT(i); 8725 error_count++; 8726 } 8727 } 8728 8729 /* check if RAZWI happened */ 8730 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) 8731 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask); 8732 8733 hl_check_for_glbl_errors(hdev); 8734 8735 WREG32(sts_clr_addr, sts_clr_val); 8736 8737 return error_count; 8738 } 8739 8740 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, 8741 u64 intr_cause_data) 8742 { 8743 int i, error_count = 0; 8744 8745 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) 8746 if (intr_cause_data & BIT(i)) { 8747 gaudi2_print_event(hdev, event_type, true, 8748 "err cause: %s", guadi2_mme_sbte_error_cause[i]); 8749 error_count++; 8750 } 8751 8752 hl_check_for_glbl_errors(hdev); 8753 8754 return error_count; 8755 } 8756 8757 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8758 u64 *event_mask) 8759 { 8760 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8761 int i; 8762 8763 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index; 8764 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index; 8765 8766 sts_val = RREG32(sts_addr); 8767 8768 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) { 8769 if (sts_val & BIT(i)) { 8770 gaudi2_print_event(hdev, event_type, true, 8771 "err cause: %s", guadi2_mme_wap_error_cause[i]); 8772 sts_clr_val |= BIT(i); 8773 error_count++; 8774 } 8775 } 8776 8777 /* check if RAZWI happened on WAP0/1 */ 8778 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask); 8779 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask); 8780 hl_check_for_glbl_errors(hdev); 8781 8782 WREG32(sts_clr_addr, sts_clr_val); 8783 8784 return error_count; 8785 } 8786 8787 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, 8788 u64 intr_cause_data) 8789 { 8790 u32 error_count = 0; 8791 int i; 8792 8793 /* If an AXI read or write error is received, an error is reported and 8794 * interrupt message is sent. Due to an HW errata, when reading the cause 8795 * register of the KDMA engine, the reported error is always HBW even if 8796 * the actual error caused by a LBW KDMA transaction. 8797 */ 8798 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8799 if (intr_cause_data & BIT(i)) { 8800 gaudi2_print_event(hdev, event_type, true, 8801 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]); 8802 error_count++; 8803 } 8804 8805 hl_check_for_glbl_errors(hdev); 8806 8807 return error_count; 8808 } 8809 8810 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause) 8811 { 8812 u32 error_count = 0; 8813 int i; 8814 8815 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8816 if (intr_cause & BIT(i)) { 8817 gaudi2_print_event(hdev, event_type, true, 8818 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); 8819 error_count++; 8820 } 8821 8822 hl_check_for_glbl_errors(hdev); 8823 8824 return error_count; 8825 } 8826 8827 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) 8828 { 8829 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 8830 8831 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 8832 if (RREG32(razwi_happened_addr)) { 8833 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8834 GAUDI2_ENGINE_ID_PCIE, event_mask); 8835 WREG32(razwi_happened_addr, 0x1); 8836 } 8837 8838 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 8839 if (RREG32(razwi_happened_addr)) { 8840 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8841 GAUDI2_ENGINE_ID_PCIE, event_mask); 8842 WREG32(razwi_happened_addr, 0x1); 8843 } 8844 8845 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 8846 if (RREG32(razwi_happened_addr)) { 8847 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8848 GAUDI2_ENGINE_ID_PCIE, event_mask); 8849 WREG32(razwi_happened_addr, 0x1); 8850 } 8851 8852 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 8853 if (RREG32(razwi_happened_addr)) { 8854 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8855 GAUDI2_ENGINE_ID_PCIE, event_mask); 8856 WREG32(razwi_happened_addr, 0x1); 8857 } 8858 } 8859 8860 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type, 8861 u64 intr_cause_data, u64 *event_mask) 8862 { 8863 u32 error_count = 0; 8864 int i; 8865 8866 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 8867 if (!(intr_cause_data & BIT_ULL(i))) 8868 continue; 8869 8870 gaudi2_print_event(hdev, event_type, true, 8871 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); 8872 error_count++; 8873 8874 /* 8875 * Always check for LBW and HBW additional info as the indication itself is 8876 * sometimes missing 8877 */ 8878 hl_check_for_glbl_errors(hdev); 8879 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); 8880 } 8881 8882 return error_count; 8883 } 8884 8885 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type, 8886 u64 intr_cause_data) 8887 8888 { 8889 u32 error_count = 0; 8890 int i; 8891 8892 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) { 8893 if (intr_cause_data & BIT_ULL(i)) { 8894 gaudi2_print_event(hdev, event_type, true, 8895 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]); 8896 error_count++; 8897 } 8898 } 8899 8900 return error_count; 8901 } 8902 8903 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data) 8904 { 8905 u32 error_count = 0; 8906 int i; 8907 8908 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) { 8909 if (intr_cause_data & BIT_ULL(i)) { 8910 gaudi2_print_event(hdev, event_type, true, 8911 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]); 8912 error_count++; 8913 } 8914 } 8915 8916 return error_count; 8917 } 8918 8919 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, 8920 u64 *event_mask) 8921 { 8922 u32 valid, val; 8923 u64 addr; 8924 8925 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8926 8927 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK)) 8928 return; 8929 8930 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE)); 8931 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK; 8932 addr <<= 32; 8933 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); 8934 8935 if (!is_pmmu) 8936 addr = gaudi2_mmu_descramble_addr(hdev, addr); 8937 8938 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n", 8939 is_pmmu ? "PMMU" : "HMMU", addr); 8940 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); 8941 8942 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0); 8943 } 8944 8945 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) 8946 { 8947 u32 valid, val; 8948 u64 addr; 8949 8950 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8951 8952 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK)) 8953 return; 8954 8955 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE)); 8956 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK; 8957 addr <<= 32; 8958 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); 8959 8960 if (!is_pmmu) 8961 addr = gaudi2_mmu_descramble_addr(hdev, addr); 8962 8963 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", 8964 is_pmmu ? "PMMU" : "HMMU", addr); 8965 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0); 8966 } 8967 8968 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, 8969 u64 mmu_base, bool is_pmmu, u64 *event_mask) 8970 { 8971 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0; 8972 int i; 8973 8974 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET); 8975 8976 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) { 8977 if (spi_sei_cause & BIT(i)) { 8978 gaudi2_print_event(hdev, event_type, true, 8979 "err cause: %s", gaudi2_mmu_spi_sei[i].cause); 8980 8981 if (i == 0) 8982 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); 8983 else if (i == 1) 8984 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 8985 8986 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0) 8987 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit); 8988 8989 error_count++; 8990 } 8991 } 8992 8993 /* Clear cause */ 8994 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause); 8995 8996 /* Clear interrupt */ 8997 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); 8998 8999 return error_count; 9000 } 9001 9002 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index) 9003 { 9004 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log, 9005 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0; 9006 int i; 9007 9008 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; 9009 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index; 9010 9011 sei_cause_val = RREG32(sei_cause_addr); 9012 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val); 9013 cq_intr_val = RREG32(cq_intr_addr); 9014 9015 /* SEI interrupt */ 9016 if (sei_cause_cause) { 9017 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */ 9018 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK, 9019 sei_cause_val); 9020 9021 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) { 9022 if (!(sei_cause_cause & BIT(i))) 9023 continue; 9024 9025 gaudi2_print_event(hdev, event_type, true, 9026 "err cause: %s. %s: 0x%X", 9027 gaudi2_sm_sei_cause[i].cause_name, 9028 gaudi2_sm_sei_cause[i].log_name, 9029 sei_cause_log); 9030 error_count++; 9031 break; 9032 } 9033 9034 /* Clear SM_SEI_CAUSE */ 9035 WREG32(sei_cause_addr, 0); 9036 } 9037 9038 /* CQ interrupt */ 9039 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) { 9040 cq_intr_queue_index = 9041 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK, 9042 cq_intr_val); 9043 9044 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n", 9045 sm_index, cq_intr_queue_index); 9046 error_count++; 9047 9048 /* Clear CQ_INTR */ 9049 WREG32(cq_intr_addr, 0); 9050 } 9051 9052 hl_check_for_glbl_errors(hdev); 9053 9054 return error_count; 9055 } 9056 9057 static u64 get_hmmu_base(u16 event_type) 9058 { 9059 u8 dcore, index_in_dcore; 9060 9061 switch (event_type) { 9062 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP: 9063 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR: 9064 dcore = 0; 9065 index_in_dcore = 0; 9066 break; 9067 case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP: 9068 case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR: 9069 dcore = 1; 9070 index_in_dcore = 0; 9071 break; 9072 case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP: 9073 case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR: 9074 dcore = 0; 9075 index_in_dcore = 1; 9076 break; 9077 case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: 9078 case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: 9079 dcore = 1; 9080 index_in_dcore = 1; 9081 break; 9082 case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: 9083 case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: 9084 dcore = 3; 9085 index_in_dcore = 2; 9086 break; 9087 case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP: 9088 case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR: 9089 dcore = 2; 9090 index_in_dcore = 2; 9091 break; 9092 case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP: 9093 case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR: 9094 dcore = 3; 9095 index_in_dcore = 3; 9096 break; 9097 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP: 9098 case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR: 9099 dcore = 2; 9100 index_in_dcore = 3; 9101 break; 9102 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP: 9103 case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR: 9104 dcore = 0; 9105 index_in_dcore = 2; 9106 break; 9107 case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP: 9108 case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR: 9109 dcore = 1; 9110 index_in_dcore = 2; 9111 break; 9112 case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP: 9113 case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR: 9114 dcore = 0; 9115 index_in_dcore = 3; 9116 break; 9117 case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: 9118 case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: 9119 dcore = 1; 9120 index_in_dcore = 3; 9121 break; 9122 case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9123 case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9124 dcore = 3; 9125 index_in_dcore = 0; 9126 break; 9127 case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP: 9128 case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR: 9129 dcore = 2; 9130 index_in_dcore = 0; 9131 break; 9132 case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP: 9133 case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR: 9134 dcore = 3; 9135 index_in_dcore = 1; 9136 break; 9137 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP: 9138 case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR: 9139 dcore = 2; 9140 index_in_dcore = 1; 9141 break; 9142 default: 9143 return ULONG_MAX; 9144 } 9145 9146 return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET; 9147 } 9148 9149 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 9150 { 9151 bool is_pmmu = false; 9152 u32 error_count = 0; 9153 u64 mmu_base; 9154 9155 switch (event_type) { 9156 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9157 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9158 mmu_base = get_hmmu_base(event_type); 9159 break; 9160 9161 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9162 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9163 is_pmmu = true; 9164 mmu_base = mmPMMU_HBW_MMU_BASE; 9165 break; 9166 default: 9167 return 0; 9168 } 9169 9170 if (mmu_base == ULONG_MAX) 9171 return 0; 9172 9173 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, 9174 is_pmmu, event_mask); 9175 hl_check_for_glbl_errors(hdev); 9176 9177 return error_count; 9178 } 9179 9180 9181 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */ 9182 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, 9183 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) 9184 { 9185 u32 addr, beat, beat_shift; 9186 bool rc = false; 9187 9188 dev_err_ratelimited(hdev->dev, 9189 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", 9190 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt), 9191 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt), 9192 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt)); 9193 9194 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val); 9195 dev_err_ratelimited(hdev->dev, 9196 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n", 9197 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr), 9198 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr), 9199 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr), 9200 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr), 9201 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr)); 9202 9203 /* For each beat (RDQS edge), look for possible errors and print relevant info */ 9204 for (beat = 0 ; beat < 4 ; beat++) { 9205 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9206 (HBM_RD_ERR_SERR_BEAT0_MASK << beat)) 9207 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n", 9208 beat, 9209 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 9210 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 9211 9212 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9213 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) { 9214 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n", 9215 beat, 9216 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 9217 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 9218 rc |= true; 9219 } 9220 9221 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; 9222 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9223 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) { 9224 dev_err_ratelimited(hdev->dev, 9225 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n", 9226 beat, 9227 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 9228 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9229 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> 9230 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); 9231 rc |= true; 9232 } 9233 9234 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); 9235 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 9236 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2])); 9237 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 9238 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); 9239 } 9240 9241 return rc; 9242 } 9243 9244 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, 9245 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt) 9246 { 9247 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds; 9248 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr; 9249 9250 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt); 9251 9252 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n", 9253 derr & 0x3, derr & 0xc); 9254 9255 /* JIRA H6-3286 - the following prints may not be valid */ 9256 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n"); 9257 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) { 9258 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr); 9259 dev_err_ratelimited(hdev->dev, 9260 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n", 9261 i, 9262 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr), 9263 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr), 9264 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr), 9265 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr)); 9266 } 9267 } 9268 9269 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev, 9270 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt) 9271 { 9272 __le32 *col_cmd = ca_par_err_data->dbg_col; 9273 __le16 *row_cmd = ca_par_err_data->dbg_row; 9274 u32 i; 9275 9276 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt); 9277 9278 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n"); 9279 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++) 9280 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i, 9281 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0), 9282 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0)); 9283 } 9284 9285 /* Returns true if hard reset is needed or false otherwise */ 9286 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, 9287 struct hl_eq_hbm_sei_data *sei_data) 9288 { 9289 bool require_hard_reset = false; 9290 u32 hbm_id, mc_id, cause_idx; 9291 9292 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4; 9293 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2; 9294 9295 cause_idx = sei_data->hdr.sei_cause; 9296 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { 9297 gaudi2_print_event(hdev, event_type, true, 9298 "err cause: %s", 9299 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx); 9300 return true; 9301 } 9302 9303 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, 9304 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s", 9305 sei_data->hdr.is_critical ? "Critical" : "Non-critical", 9306 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 9307 hbm_mc_sei_cause[cause_idx]); 9308 9309 /* Print error-specific info */ 9310 switch (cause_idx) { 9311 case HBM_SEI_CATTRIP: 9312 require_hard_reset = true; 9313 break; 9314 9315 case HBM_SEI_CMD_PARITY_EVEN: 9316 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info, 9317 le32_to_cpu(sei_data->hdr.cnt)); 9318 require_hard_reset = true; 9319 break; 9320 9321 case HBM_SEI_CMD_PARITY_ODD: 9322 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info, 9323 le32_to_cpu(sei_data->hdr.cnt)); 9324 require_hard_reset = true; 9325 break; 9326 9327 case HBM_SEI_WRITE_DATA_PARITY_ERR: 9328 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info, 9329 le32_to_cpu(sei_data->hdr.cnt)); 9330 require_hard_reset = true; 9331 break; 9332 9333 case HBM_SEI_READ_ERR: 9334 /* Unlike other SEI events, read error requires further processing of the 9335 * raw data in order to determine the root cause. 9336 */ 9337 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev, 9338 &sei_data->read_err_info, 9339 le32_to_cpu(sei_data->hdr.cnt)); 9340 break; 9341 9342 default: 9343 break; 9344 } 9345 9346 require_hard_reset |= !!sei_data->hdr.is_critical; 9347 9348 return require_hard_reset; 9349 } 9350 9351 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type, 9352 u64 intr_cause_data) 9353 { 9354 if (intr_cause_data) { 9355 gaudi2_print_event(hdev, event_type, true, 9356 "temperature error cause: %#llx", intr_cause_data); 9357 return 1; 9358 } 9359 9360 return 0; 9361 } 9362 9363 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data) 9364 { 9365 u32 i, error_count = 0; 9366 9367 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++) 9368 if (intr_cause_data & hbm_mc_spi[i].mask) { 9369 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n", 9370 hbm_mc_spi[i].cause); 9371 error_count++; 9372 } 9373 9374 return error_count; 9375 } 9376 9377 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 9378 { 9379 ktime_t zero_time = ktime_set(0, 0); 9380 9381 mutex_lock(&hdev->clk_throttling.lock); 9382 9383 switch (event_type) { 9384 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9385 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 9386 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 9387 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 9388 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 9389 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); 9390 break; 9391 9392 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9393 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 9394 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 9395 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); 9396 break; 9397 9398 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9399 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 9400 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 9401 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 9402 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 9403 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9404 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); 9405 break; 9406 9407 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9408 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 9409 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 9410 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9411 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); 9412 break; 9413 9414 default: 9415 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); 9416 break; 9417 } 9418 9419 mutex_unlock(&hdev->clk_throttling.lock); 9420 } 9421 9422 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type, 9423 struct cpucp_pkt_sync_err *sync_err) 9424 { 9425 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 9426 9427 gaudi2_print_event(hdev, event_type, false, 9428 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d", 9429 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), 9430 q->pi, atomic_read(&q->ci)); 9431 } 9432 9433 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) 9434 { 9435 u32 p2p_intr, msix_gw_intr, error_count = 0; 9436 9437 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR); 9438 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR); 9439 9440 if (p2p_intr) { 9441 gaudi2_print_event(hdev, event_type, true, 9442 "pcie p2p transaction terminated due to security, req_id(0x%x)", 9443 RREG32(mmPCIE_WRAP_P2P_REQ_ID)); 9444 9445 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); 9446 error_count++; 9447 } 9448 9449 if (msix_gw_intr) { 9450 gaudi2_print_event(hdev, event_type, true, 9451 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)", 9452 RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); 9453 9454 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); 9455 error_count++; 9456 } 9457 9458 return error_count; 9459 } 9460 9461 static int gaudi2_handle_pcie_drain(struct hl_device *hdev, 9462 struct hl_eq_pcie_drain_ind_data *drain_data) 9463 { 9464 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; 9465 9466 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); 9467 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); 9468 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); 9469 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); 9470 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); 9471 9472 if (cause & BIT_ULL(0)) { 9473 dev_err_ratelimited(hdev->dev, 9474 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", 9475 !!lbw_rd, !!lbw_wr); 9476 error_count++; 9477 } 9478 9479 if (cause & BIT_ULL(1)) { 9480 dev_err_ratelimited(hdev->dev, 9481 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", 9482 hbw_rd, hbw_wr); 9483 error_count++; 9484 } 9485 9486 return error_count; 9487 } 9488 9489 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) 9490 { 9491 u32 error_count = 0; 9492 int i; 9493 9494 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) { 9495 if (intr_cause_data & BIT_ULL(i)) { 9496 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n", 9497 gaudi2_psoc_axi_drain_interrupts_cause[i]); 9498 error_count++; 9499 } 9500 } 9501 9502 hl_check_for_glbl_errors(hdev); 9503 9504 return error_count; 9505 } 9506 9507 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type, 9508 struct cpucp_pkt_sync_err *sync_err) 9509 { 9510 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 9511 9512 gaudi2_print_event(hdev, event_type, false, 9513 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d", 9514 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 9515 } 9516 9517 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, 9518 struct hl_eq_engine_arc_intr_data *data) 9519 { 9520 struct hl_engine_arc_dccm_queue_full_irq *q; 9521 u32 intr_type, engine_id; 9522 u64 payload; 9523 9524 intr_type = le32_to_cpu(data->intr_type); 9525 engine_id = le32_to_cpu(data->engine_id); 9526 payload = le64_to_cpu(data->payload); 9527 9528 switch (intr_type) { 9529 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ: 9530 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 9531 9532 gaudi2_print_event(hdev, event_type, true, 9533 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u", 9534 engine_id, intr_type, q->queue_index); 9535 return 1; 9536 default: 9537 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type"); 9538 return 0; 9539 } 9540 } 9541 9542 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 9543 { 9544 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9545 bool reset_required = false, is_critical = false; 9546 u32 index, ctl, reset_flags = 0, error_count = 0; 9547 u64 event_mask = 0; 9548 u16 event_type; 9549 9550 ctl = le32_to_cpu(eq_entry->hdr.ctl); 9551 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); 9552 9553 if (event_type >= GAUDI2_EVENT_SIZE) { 9554 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 9555 event_type, GAUDI2_EVENT_SIZE - 1); 9556 return; 9557 } 9558 9559 gaudi2->events_stat[event_type]++; 9560 gaudi2->events_stat_aggregate[event_type]++; 9561 9562 switch (event_type) { 9563 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR: 9564 fallthrough; 9565 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 9566 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9567 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9568 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 9569 is_critical = eq_entry->ecc_data.is_critical; 9570 error_count++; 9571 break; 9572 9573 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: 9574 fallthrough; 9575 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM: 9576 fallthrough; 9577 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 9578 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask); 9579 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9580 break; 9581 9582 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 9583 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); 9584 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9585 break; 9586 9587 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 9588 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); 9589 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9590 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; 9591 break; 9592 9593 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 9594 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 9595 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask); 9596 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9597 break; 9598 9599 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 9600 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 9601 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 9602 error_count = gaudi2_handle_rot_err(hdev, index, event_type, 9603 &eq_entry->razwi_with_intr_cause, &event_mask); 9604 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9605 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9606 break; 9607 9608 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 9609 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 9610 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 9611 &eq_entry->razwi_with_intr_cause, &event_mask); 9612 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9613 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9614 break; 9615 9616 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 9617 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 9618 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 9619 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9620 break; 9621 9622 case GAUDI2_EVENT_TPC0_KERNEL_ERR: 9623 case GAUDI2_EVENT_TPC1_KERNEL_ERR: 9624 case GAUDI2_EVENT_TPC2_KERNEL_ERR: 9625 case GAUDI2_EVENT_TPC3_KERNEL_ERR: 9626 case GAUDI2_EVENT_TPC4_KERNEL_ERR: 9627 case GAUDI2_EVENT_TPC5_KERNEL_ERR: 9628 case GAUDI2_EVENT_TPC6_KERNEL_ERR: 9629 case GAUDI2_EVENT_TPC7_KERNEL_ERR: 9630 case GAUDI2_EVENT_TPC8_KERNEL_ERR: 9631 case GAUDI2_EVENT_TPC9_KERNEL_ERR: 9632 case GAUDI2_EVENT_TPC10_KERNEL_ERR: 9633 case GAUDI2_EVENT_TPC11_KERNEL_ERR: 9634 case GAUDI2_EVENT_TPC12_KERNEL_ERR: 9635 case GAUDI2_EVENT_TPC13_KERNEL_ERR: 9636 case GAUDI2_EVENT_TPC14_KERNEL_ERR: 9637 case GAUDI2_EVENT_TPC15_KERNEL_ERR: 9638 case GAUDI2_EVENT_TPC16_KERNEL_ERR: 9639 case GAUDI2_EVENT_TPC17_KERNEL_ERR: 9640 case GAUDI2_EVENT_TPC18_KERNEL_ERR: 9641 case GAUDI2_EVENT_TPC19_KERNEL_ERR: 9642 case GAUDI2_EVENT_TPC20_KERNEL_ERR: 9643 case GAUDI2_EVENT_TPC21_KERNEL_ERR: 9644 case GAUDI2_EVENT_TPC22_KERNEL_ERR: 9645 case GAUDI2_EVENT_TPC23_KERNEL_ERR: 9646 case GAUDI2_EVENT_TPC24_KERNEL_ERR: 9647 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 9648 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 9649 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 9650 &eq_entry->razwi_with_intr_cause, &event_mask); 9651 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9652 break; 9653 9654 case GAUDI2_EVENT_DEC0_SPI: 9655 case GAUDI2_EVENT_DEC1_SPI: 9656 case GAUDI2_EVENT_DEC2_SPI: 9657 case GAUDI2_EVENT_DEC3_SPI: 9658 case GAUDI2_EVENT_DEC4_SPI: 9659 case GAUDI2_EVENT_DEC5_SPI: 9660 case GAUDI2_EVENT_DEC6_SPI: 9661 case GAUDI2_EVENT_DEC7_SPI: 9662 case GAUDI2_EVENT_DEC8_SPI: 9663 case GAUDI2_EVENT_DEC9_SPI: 9664 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 9665 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 9666 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 9667 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9668 break; 9669 9670 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 9671 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 9672 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 9673 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 9674 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 9675 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 9676 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 9677 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 9678 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9679 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9680 break; 9681 9682 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: 9683 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR: 9684 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR: 9685 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR: 9686 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / 9687 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 9688 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 9689 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 9690 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9691 break; 9692 9693 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: 9694 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: 9695 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: 9696 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: 9697 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / 9698 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 9699 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 9700 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask); 9701 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9702 break; 9703 9704 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 9705 case GAUDI2_EVENT_KDMA0_CORE: 9706 error_count = gaudi2_handle_kdma_core_event(hdev, event_type, 9707 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9708 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9709 break; 9710 9711 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE: 9712 error_count = gaudi2_handle_dma_core_event(hdev, event_type, 9713 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9714 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9715 break; 9716 9717 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE: 9718 error_count = gaudi2_handle_dma_core_event(hdev, event_type, 9719 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9720 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9721 break; 9722 9723 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 9724 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type, 9725 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); 9726 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9727 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9728 break; 9729 9730 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9731 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9732 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9733 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9734 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); 9735 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9736 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9737 break; 9738 9739 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 9740 error_count = gaudi2_handle_hif_fatal(hdev, event_type, 9741 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9742 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9743 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9744 break; 9745 9746 case GAUDI2_EVENT_PMMU_FATAL_0: 9747 error_count = gaudi2_handle_pif_fatal(hdev, event_type, 9748 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9749 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9750 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9751 break; 9752 9753 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 9754 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); 9755 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9756 break; 9757 9758 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 9759 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9760 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 9761 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9762 reset_required = true; 9763 } 9764 error_count++; 9765 break; 9766 9767 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 9768 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type, 9769 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9770 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9771 break; 9772 9773 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 9774 error_count = gaudi2_handle_hbm_mc_spi(hdev, 9775 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9776 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9777 break; 9778 9779 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 9780 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 9781 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9782 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9783 break; 9784 9785 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 9786 error_count = gaudi2_handle_psoc_drain(hdev, 9787 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9788 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9789 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9790 break; 9791 9792 case GAUDI2_EVENT_CPU_AXI_ECC: 9793 error_count = GAUDI2_NA_EVENT_CAUSE; 9794 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9795 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9796 break; 9797 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 9798 error_count = GAUDI2_NA_EVENT_CAUSE; 9799 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9800 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9801 break; 9802 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 9803 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: 9804 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: 9805 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: 9806 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, 9807 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9808 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9809 break; 9810 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 9811 error_count = GAUDI2_NA_EVENT_CAUSE; 9812 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9813 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9814 break; 9815 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 9816 error_count = GAUDI2_NA_EVENT_CAUSE; 9817 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9818 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9819 break; 9820 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 9821 error_count = GAUDI2_NA_EVENT_CAUSE; 9822 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9823 break; 9824 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 9825 error_count = GAUDI2_NA_EVENT_CAUSE; 9826 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9827 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9828 break; 9829 case GAUDI2_EVENT_PCIE_FATAL_ERR: 9830 error_count = GAUDI2_NA_EVENT_CAUSE; 9831 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9832 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9833 break; 9834 case GAUDI2_EVENT_TPC0_BMON_SPMU: 9835 case GAUDI2_EVENT_TPC1_BMON_SPMU: 9836 case GAUDI2_EVENT_TPC2_BMON_SPMU: 9837 case GAUDI2_EVENT_TPC3_BMON_SPMU: 9838 case GAUDI2_EVENT_TPC4_BMON_SPMU: 9839 case GAUDI2_EVENT_TPC5_BMON_SPMU: 9840 case GAUDI2_EVENT_TPC6_BMON_SPMU: 9841 case GAUDI2_EVENT_TPC7_BMON_SPMU: 9842 case GAUDI2_EVENT_TPC8_BMON_SPMU: 9843 case GAUDI2_EVENT_TPC9_BMON_SPMU: 9844 case GAUDI2_EVENT_TPC10_BMON_SPMU: 9845 case GAUDI2_EVENT_TPC11_BMON_SPMU: 9846 case GAUDI2_EVENT_TPC12_BMON_SPMU: 9847 case GAUDI2_EVENT_TPC13_BMON_SPMU: 9848 case GAUDI2_EVENT_TPC14_BMON_SPMU: 9849 case GAUDI2_EVENT_TPC15_BMON_SPMU: 9850 case GAUDI2_EVENT_TPC16_BMON_SPMU: 9851 case GAUDI2_EVENT_TPC17_BMON_SPMU: 9852 case GAUDI2_EVENT_TPC18_BMON_SPMU: 9853 case GAUDI2_EVENT_TPC19_BMON_SPMU: 9854 case GAUDI2_EVENT_TPC20_BMON_SPMU: 9855 case GAUDI2_EVENT_TPC21_BMON_SPMU: 9856 case GAUDI2_EVENT_TPC22_BMON_SPMU: 9857 case GAUDI2_EVENT_TPC23_BMON_SPMU: 9858 case GAUDI2_EVENT_TPC24_BMON_SPMU: 9859 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU: 9860 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU: 9861 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU: 9862 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU: 9863 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU: 9864 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU: 9865 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU: 9866 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU: 9867 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU: 9868 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU: 9869 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU: 9870 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU: 9871 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU: 9872 fallthrough; 9873 case GAUDI2_EVENT_DEC0_BMON_SPMU: 9874 case GAUDI2_EVENT_DEC1_BMON_SPMU: 9875 case GAUDI2_EVENT_DEC2_BMON_SPMU: 9876 case GAUDI2_EVENT_DEC3_BMON_SPMU: 9877 case GAUDI2_EVENT_DEC4_BMON_SPMU: 9878 case GAUDI2_EVENT_DEC5_BMON_SPMU: 9879 case GAUDI2_EVENT_DEC6_BMON_SPMU: 9880 case GAUDI2_EVENT_DEC7_BMON_SPMU: 9881 case GAUDI2_EVENT_DEC8_BMON_SPMU: 9882 case GAUDI2_EVENT_DEC9_BMON_SPMU: 9883 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 9884 error_count = GAUDI2_NA_EVENT_CAUSE; 9885 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9886 break; 9887 9888 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9889 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9890 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9891 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9892 gaudi2_print_clk_change_info(hdev, event_type, &event_mask); 9893 error_count = GAUDI2_NA_EVENT_CAUSE; 9894 break; 9895 9896 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 9897 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); 9898 error_count = GAUDI2_NA_EVENT_CAUSE; 9899 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9900 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9901 break; 9902 9903 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 9904 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9905 error_count = GAUDI2_NA_EVENT_CAUSE; 9906 /* Do nothing- FW will handle it */ 9907 break; 9908 9909 case GAUDI2_EVENT_PCIE_P2P_MSIX: 9910 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type); 9911 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9912 break; 9913 9914 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 9915 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 9916 error_count = gaudi2_handle_sm_err(hdev, event_type, index); 9917 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9918 break; 9919 9920 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 9921 error_count = GAUDI2_NA_EVENT_CAUSE; 9922 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9923 break; 9924 9925 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 9926 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 9927 le64_to_cpu(eq_entry->data[0])); 9928 error_count = GAUDI2_NA_EVENT_CAUSE; 9929 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9930 break; 9931 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 9932 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 9933 le64_to_cpu(eq_entry->data[0])); 9934 error_count = GAUDI2_NA_EVENT_CAUSE; 9935 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9936 break; 9937 9938 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 9939 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); 9940 error_count = GAUDI2_NA_EVENT_CAUSE; 9941 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9942 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9943 break; 9944 9945 case GAUDI2_EVENT_ARC_DCCM_FULL: 9946 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data); 9947 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9948 break; 9949 9950 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: 9951 case GAUDI2_EVENT_CPU_DEV_RESET_REQ: 9952 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9953 error_count = GAUDI2_NA_EVENT_CAUSE; 9954 is_critical = true; 9955 break; 9956 9957 default: 9958 if (gaudi2_irq_map_table[event_type].valid) { 9959 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", 9960 event_type); 9961 error_count = GAUDI2_NA_EVENT_CAUSE; 9962 } 9963 } 9964 9965 /* Make sure to dump an error in case no error cause was printed so far. 9966 * Note that although we have counted the errors, we use this number as 9967 * a boolean. 9968 */ 9969 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type)) 9970 gaudi2_print_event(hdev, event_type, true, "%d", event_type); 9971 else if (error_count == 0) 9972 gaudi2_print_event(hdev, event_type, true, 9973 "No error cause for H/W event %u", event_type); 9974 9975 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || 9976 reset_required) { 9977 if (reset_required || 9978 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD)) 9979 reset_flags |= HL_DRV_RESET_HARD; 9980 9981 if (hdev->hard_reset_on_fw_events || 9982 (hdev->asic_prop.fw_security_enabled && is_critical)) 9983 goto reset_device; 9984 } 9985 9986 /* Send unmask irq only for interrupts not classified as MSG */ 9987 if (!gaudi2_irq_map_table[event_type].msg) 9988 hl_fw_unmask_irq(hdev, event_type); 9989 9990 if (event_mask) 9991 hl_notifier_event_send_all(hdev, event_mask); 9992 9993 return; 9994 9995 reset_device: 9996 if (hdev->asic_prop.fw_security_enabled && is_critical) { 9997 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; 9998 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 9999 } else { 10000 reset_flags |= HL_DRV_RESET_DELAY; 10001 } 10002 /* escalate general hw errors to critical/fatal error */ 10003 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 10004 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 10005 10006 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 10007 hl_device_cond_reset(hdev, reset_flags, event_mask); 10008 } 10009 10010 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, 10011 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, 10012 u32 hw_queue_id, u32 size, u64 addr, u32 val) 10013 { 10014 u32 ctl, pkt_size; 10015 int rc = 0; 10016 10017 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 10018 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 10019 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); 10020 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); 10021 10022 lin_dma_pkt->ctl = cpu_to_le32(ctl); 10023 lin_dma_pkt->src_addr = cpu_to_le64(val); 10024 lin_dma_pkt->dst_addr = cpu_to_le64(addr); 10025 lin_dma_pkt->tsize = cpu_to_le32(size); 10026 10027 pkt_size = sizeof(struct packet_lin_dma); 10028 10029 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 10030 if (rc) 10031 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", 10032 hw_queue_id); 10033 10034 return rc; 10035 } 10036 10037 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) 10038 { 10039 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 10040 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 10041 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 10042 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; 10043 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, 10044 old_mmubp, mmubp, num_of_pkts, busy, pkt_size; 10045 u64 comp_addr, cur_addr = addr, end_addr = addr + size; 10046 struct asic_fixed_properties *prop = &hdev->asic_prop; 10047 void *lin_dma_pkts_arr; 10048 dma_addr_t pkt_dma_addr; 10049 int rc = 0, dma_num = 0; 10050 10051 if (prop->edma_enabled_mask == 0) { 10052 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); 10053 return -EIO; 10054 } 10055 10056 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 10057 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 10058 comp_addr = CFG_BASE + sob_addr; 10059 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 10060 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 10061 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | 10062 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); 10063 10064 /* Calculate how many lin dma pkts we'll need */ 10065 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); 10066 pkt_size = sizeof(struct packet_lin_dma); 10067 10068 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, 10069 &pkt_dma_addr, GFP_KERNEL); 10070 if (!lin_dma_pkts_arr) 10071 return -ENOMEM; 10072 10073 /* 10074 * set mmu bypass for the scrubbing - all ddmas are configured the same so save 10075 * only the first one to restore later 10076 * also set the sob addr for all edma cores for completion. 10077 * set QM as trusted to allow it to access physical address with MMU bp. 10078 */ 10079 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); 10080 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 10081 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 10082 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 10083 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 10084 10085 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 10086 continue; 10087 10088 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + 10089 edma_offset, mmubp); 10090 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 10091 lower_32_bits(comp_addr)); 10092 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 10093 upper_32_bits(comp_addr)); 10094 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 10095 comp_val); 10096 gaudi2_qman_set_test_mode(hdev, 10097 edma_queues_id[dcore] + 4 * edma_idx, true); 10098 } 10099 } 10100 10101 WREG32(sob_addr, 0); 10102 10103 while (cur_addr < end_addr) { 10104 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 10105 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 10106 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 10107 10108 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 10109 continue; 10110 10111 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); 10112 10113 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, 10114 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, 10115 pkt_dma_addr + dma_num * pkt_size, 10116 edma_queues_id[dcore] + edma_idx * 4, 10117 chunk_size, cur_addr, val); 10118 if (rc) 10119 goto end; 10120 10121 dma_num++; 10122 cur_addr += chunk_size; 10123 if (cur_addr == end_addr) 10124 break; 10125 } 10126 } 10127 } 10128 10129 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); 10130 if (rc) { 10131 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); 10132 goto end; 10133 } 10134 end: 10135 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 10136 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 10137 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 10138 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 10139 10140 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 10141 continue; 10142 10143 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); 10144 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); 10145 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); 10146 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); 10147 gaudi2_qman_set_test_mode(hdev, 10148 edma_queues_id[dcore] + 4 * edma_idx, false); 10149 } 10150 } 10151 10152 WREG32(sob_addr, 0); 10153 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); 10154 10155 return rc; 10156 } 10157 10158 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val) 10159 { 10160 int rc; 10161 struct asic_fixed_properties *prop = &hdev->asic_prop; 10162 u64 size = prop->dram_end_address - prop->dram_user_base_address; 10163 10164 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val); 10165 10166 if (rc) 10167 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n", 10168 prop->dram_user_base_address, size); 10169 return rc; 10170 } 10171 10172 static int gaudi2_scrub_device_mem(struct hl_device *hdev) 10173 { 10174 int rc; 10175 struct asic_fixed_properties *prop = &hdev->asic_prop; 10176 u64 val = hdev->memory_scrub_val; 10177 u64 addr, size; 10178 10179 if (!hdev->memory_scrub) 10180 return 0; 10181 10182 /* scrub SRAM */ 10183 addr = prop->sram_user_base_address; 10184 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET); 10185 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n", 10186 addr, addr + size, val); 10187 rc = gaudi2_memset_device_memory(hdev, addr, size, val); 10188 if (rc) { 10189 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc); 10190 return rc; 10191 } 10192 10193 /* scrub DRAM */ 10194 rc = gaudi2_scrub_device_dram(hdev, val); 10195 if (rc) { 10196 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc); 10197 return rc; 10198 } 10199 return 0; 10200 } 10201 10202 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) 10203 { 10204 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr, 10205 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr; 10206 u32 val, size, offset; 10207 int dcore_id; 10208 10209 offset = hdev->asic_prop.first_available_cq[0] * 4; 10210 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset; 10211 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset; 10212 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset; 10213 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset; 10214 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset; 10215 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset; 10216 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - 10217 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset); 10218 10219 /* memset dcore0 CQ registers */ 10220 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 10221 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 10222 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 10223 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 10224 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 10225 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 10226 10227 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; 10228 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; 10229 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET; 10230 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET; 10231 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET; 10232 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET; 10233 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0; 10234 10235 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 10236 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 10237 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 10238 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 10239 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 10240 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 10241 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 10242 10243 cq_lbw_l_addr += DCORE_OFFSET; 10244 cq_lbw_h_addr += DCORE_OFFSET; 10245 cq_lbw_data_addr += DCORE_OFFSET; 10246 cq_base_l_addr += DCORE_OFFSET; 10247 cq_base_h_addr += DCORE_OFFSET; 10248 cq_size_addr += DCORE_OFFSET; 10249 } 10250 10251 offset = hdev->asic_prop.first_available_user_mon[0] * 4; 10252 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset; 10253 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT; 10254 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset); 10255 10256 /* memset dcore0 monitors */ 10257 gaudi2_memset_device_lbw(hdev, addr, size, val); 10258 10259 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset; 10260 gaudi2_memset_device_lbw(hdev, addr, size, 0); 10261 10262 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET; 10263 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET; 10264 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0; 10265 10266 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 10267 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val); 10268 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0); 10269 mon_sts_addr += DCORE_OFFSET; 10270 mon_cfg_addr += DCORE_OFFSET; 10271 } 10272 10273 offset = hdev->asic_prop.first_available_user_sob[0] * 4; 10274 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset; 10275 val = 0; 10276 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - 10277 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 10278 10279 /* memset dcore0 sobs */ 10280 gaudi2_memset_device_lbw(hdev, addr, size, val); 10281 10282 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET; 10283 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0; 10284 10285 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 10286 gaudi2_memset_device_lbw(hdev, addr, size, val); 10287 addr += DCORE_OFFSET; 10288 } 10289 10290 /* Flush all WREG to prevent race */ 10291 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 10292 } 10293 10294 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev) 10295 { 10296 u32 reg_base, hw_queue_id; 10297 10298 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0; 10299 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 10300 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 10301 continue; 10302 10303 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 10304 10305 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 10306 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 10307 } 10308 10309 /* Flush all WREG to prevent race */ 10310 RREG32(mmPDMA0_QM_ARB_CFG_0); 10311 } 10312 10313 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev) 10314 { 10315 u32 reg_base, hw_queue_id; 10316 10317 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3; 10318 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 10319 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 10320 continue; 10321 10322 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 10323 10324 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 10325 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 10326 } 10327 10328 /* Flush all WREG to prevent race */ 10329 RREG32(mmPDMA0_QM_ARB_CFG_0); 10330 } 10331 10332 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid) 10333 { 10334 return 0; 10335 } 10336 10337 static void gaudi2_restore_phase_topology(struct hl_device *hdev) 10338 { 10339 } 10340 10341 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx, 10342 struct dup_block_ctx *cfg_ctx) 10343 { 10344 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off; 10345 u8 seq; 10346 int i; 10347 10348 for (i = 0 ; i < cfg_ctx->instances ; i++) { 10349 seq = block_idx * cfg_ctx->instances + i; 10350 10351 /* skip disabled instance */ 10352 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq))) 10353 continue; 10354 10355 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off, 10356 cfg_ctx->data); 10357 } 10358 } 10359 10360 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx, 10361 u64 mask) 10362 { 10363 int i; 10364 10365 cfg_ctx->enabled_mask = mask; 10366 10367 for (i = 0 ; i < cfg_ctx->blocks ; i++) 10368 gaudi2_init_block_instances(hdev, i, cfg_ctx); 10369 } 10370 10371 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx) 10372 { 10373 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX); 10374 } 10375 10376 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) 10377 { 10378 void *host_mem_virtual_addr; 10379 dma_addr_t host_mem_dma_addr; 10380 u64 reserved_va_base; 10381 u32 pos, size_left, size_to_dma; 10382 struct hl_ctx *ctx; 10383 int rc = 0; 10384 10385 /* Fetch the ctx */ 10386 ctx = hl_get_compute_ctx(hdev); 10387 if (!ctx) { 10388 dev_err(hdev->dev, "No ctx available\n"); 10389 return -EINVAL; 10390 } 10391 10392 /* Allocate buffers for read and for poll */ 10393 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr, 10394 GFP_KERNEL | __GFP_ZERO); 10395 if (host_mem_virtual_addr == NULL) { 10396 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n"); 10397 rc = -ENOMEM; 10398 goto put_ctx; 10399 } 10400 10401 /* Reserve VM region on asic side */ 10402 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M, 10403 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 10404 if (!reserved_va_base) { 10405 dev_err(hdev->dev, "Failed to reserve vmem on asic\n"); 10406 rc = -ENOMEM; 10407 goto free_data_buffer; 10408 } 10409 10410 /* Create mapping on asic side */ 10411 mutex_lock(&hdev->mmu_lock); 10412 10413 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 10414 if (rc) { 10415 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 10416 goto unreserve_va; 10417 } 10418 10419 rc = hl_mmu_invalidate_cache_range(hdev, false, 10420 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 10421 ctx->asid, reserved_va_base, SZ_2M); 10422 if (rc) { 10423 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 10424 goto unreserve_va; 10425 } 10426 10427 mutex_unlock(&hdev->mmu_lock); 10428 10429 /* Enable MMU on KDMA */ 10430 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); 10431 10432 pos = 0; 10433 size_left = size; 10434 size_to_dma = SZ_2M; 10435 10436 while (size_left > 0) { 10437 if (size_left < SZ_2M) 10438 size_to_dma = size_left; 10439 10440 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false); 10441 if (rc) 10442 break; 10443 10444 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma); 10445 10446 if (size_left <= SZ_2M) 10447 break; 10448 10449 pos += SZ_2M; 10450 addr += SZ_2M; 10451 size_left -= SZ_2M; 10452 } 10453 10454 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 10455 10456 mutex_lock(&hdev->mmu_lock); 10457 10458 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 10459 if (rc) 10460 goto unreserve_va; 10461 10462 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 10463 ctx->asid, reserved_va_base, SZ_2M); 10464 10465 unreserve_va: 10466 mutex_unlock(&hdev->mmu_lock); 10467 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 10468 free_data_buffer: 10469 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); 10470 put_ctx: 10471 hl_ctx_put(ctx); 10472 10473 return rc; 10474 } 10475 10476 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx) 10477 { 10478 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10479 int min_alloc_order, rc; 10480 10481 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 10482 return 0; 10483 10484 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 10485 HOST_SPACE_INTERNAL_CB_SZ, 10486 &hdev->internal_cb_pool_dma_addr, 10487 GFP_KERNEL | __GFP_ZERO); 10488 10489 if (!hdev->internal_cb_pool_virt_addr) 10490 return -ENOMEM; 10491 10492 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev), 10493 gaudi2_get_wait_cb_size(hdev))); 10494 10495 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 10496 if (!hdev->internal_cb_pool) { 10497 dev_err(hdev->dev, "Failed to create internal CB pool\n"); 10498 rc = -ENOMEM; 10499 goto free_internal_cb_pool; 10500 } 10501 10502 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr, 10503 HOST_SPACE_INTERNAL_CB_SZ, -1); 10504 if (rc) { 10505 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n"); 10506 rc = -EFAULT; 10507 goto destroy_internal_cb_pool; 10508 } 10509 10510 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 10511 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 10512 10513 if (!hdev->internal_cb_va_base) { 10514 rc = -ENOMEM; 10515 goto destroy_internal_cb_pool; 10516 } 10517 10518 mutex_lock(&hdev->mmu_lock); 10519 10520 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 10521 HOST_SPACE_INTERNAL_CB_SZ); 10522 if (rc) 10523 goto unreserve_internal_cb_pool; 10524 10525 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 10526 if (rc) 10527 goto unmap_internal_cb_pool; 10528 10529 mutex_unlock(&hdev->mmu_lock); 10530 10531 return 0; 10532 10533 unmap_internal_cb_pool: 10534 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10535 unreserve_internal_cb_pool: 10536 mutex_unlock(&hdev->mmu_lock); 10537 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10538 destroy_internal_cb_pool: 10539 gen_pool_destroy(hdev->internal_cb_pool); 10540 free_internal_cb_pool: 10541 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 10542 hdev->internal_cb_pool_dma_addr); 10543 10544 return rc; 10545 } 10546 10547 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx) 10548 { 10549 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10550 10551 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 10552 return; 10553 10554 mutex_lock(&hdev->mmu_lock); 10555 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10556 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10557 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 10558 mutex_unlock(&hdev->mmu_lock); 10559 10560 gen_pool_destroy(hdev->internal_cb_pool); 10561 10562 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 10563 hdev->internal_cb_pool_dma_addr); 10564 } 10565 10566 static void gaudi2_restore_user_registers(struct hl_device *hdev) 10567 { 10568 gaudi2_restore_user_sm_registers(hdev); 10569 gaudi2_restore_user_qm_registers(hdev); 10570 } 10571 10572 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 10573 { 10574 struct hl_device *hdev = ctx->hdev; 10575 struct asic_fixed_properties *prop = &hdev->asic_prop; 10576 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10577 int rc; 10578 10579 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 10580 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true); 10581 if (rc) 10582 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n", 10583 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 10584 10585 return rc; 10586 } 10587 10588 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 10589 { 10590 struct hl_device *hdev = ctx->hdev; 10591 struct asic_fixed_properties *prop = &hdev->asic_prop; 10592 int rc; 10593 10594 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 10595 prop->pmmu.page_size, true); 10596 if (rc) 10597 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n", 10598 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 10599 } 10600 10601 static int gaudi2_ctx_init(struct hl_ctx *ctx) 10602 { 10603 int rc; 10604 10605 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); 10606 if (rc) 10607 return rc; 10608 10609 /* No need to clear user registers if the device has just 10610 * performed reset, we restore only nic qm registers 10611 */ 10612 if (ctx->hdev->reset_upon_device_release) 10613 gaudi2_restore_nic_qm_registers(ctx->hdev); 10614 else 10615 gaudi2_restore_user_registers(ctx->hdev); 10616 10617 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx); 10618 if (rc) 10619 return rc; 10620 10621 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx); 10622 if (rc) 10623 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 10624 10625 return rc; 10626 } 10627 10628 static void gaudi2_ctx_fini(struct hl_ctx *ctx) 10629 { 10630 if (ctx->asid == HL_KERNEL_ASID_ID) 10631 return; 10632 10633 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 10634 10635 gaudi2_unmap_virtual_msix_doorbell_memory(ctx); 10636 } 10637 10638 static int gaudi2_pre_schedule_cs(struct hl_cs *cs) 10639 { 10640 struct hl_device *hdev = cs->ctx->hdev; 10641 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 10642 u32 mon_payload, sob_id, mon_id; 10643 10644 if (!cs_needs_completion(cs)) 10645 return 0; 10646 10647 /* 10648 * First 64 SOB/MON are reserved for driver for QMAN auto completion 10649 * mechanism. Each SOB/MON pair are used for a pending CS with the same 10650 * cyclic index. The SOB value is increased when each of the CS jobs is 10651 * completed. When the SOB reaches the number of CS jobs, the monitor 10652 * generates MSI-X interrupt. 10653 */ 10654 10655 sob_id = mon_id = index; 10656 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | 10657 (1 << CQ_ENTRY_READY_SHIFT) | index; 10658 10659 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload, 10660 cs->jobs_cnt); 10661 10662 return 0; 10663 } 10664 10665 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 10666 { 10667 return HL_INVALID_QUEUE; 10668 } 10669 10670 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 10671 { 10672 struct hl_cb *cb = data; 10673 struct packet_msg_short *pkt; 10674 u32 value, ctl, pkt_size = sizeof(*pkt); 10675 10676 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size); 10677 memset(pkt, 0, pkt_size); 10678 10679 /* Inc by 1, Mode ADD */ 10680 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 10681 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 10682 10683 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 10684 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */ 10685 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10686 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb); 10687 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10688 10689 pkt->value = cpu_to_le32(value); 10690 pkt->ctl = cpu_to_le32(ctl); 10691 10692 return size + pkt_size; 10693 } 10694 10695 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr) 10696 { 10697 u32 ctl, pkt_size = sizeof(*pkt); 10698 10699 memset(pkt, 0, pkt_size); 10700 10701 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10702 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10703 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10704 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10705 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0); 10706 10707 pkt->value = cpu_to_le32(value); 10708 pkt->ctl = cpu_to_le32(ctl); 10709 10710 return pkt_size; 10711 } 10712 10713 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt, 10714 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr) 10715 { 10716 u32 ctl, value, pkt_size = sizeof(*pkt); 10717 u8 mask; 10718 10719 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 10720 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask); 10721 return 0; 10722 } 10723 10724 memset(pkt, 0, pkt_size); 10725 10726 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 10727 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 10728 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/ 10729 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask); 10730 10731 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10732 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10733 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10734 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10735 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10736 10737 pkt->value = cpu_to_le32(value); 10738 pkt->ctl = cpu_to_le32(ctl); 10739 10740 return pkt_size; 10741 } 10742 10743 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) 10744 { 10745 u32 ctl, cfg, pkt_size = sizeof(*pkt); 10746 10747 memset(pkt, 0, pkt_size); 10748 10749 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 10750 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 10751 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2); 10752 10753 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 10754 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10755 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10756 10757 pkt->cfg = cpu_to_le32(cfg); 10758 pkt->ctl = cpu_to_le32(ctl); 10759 10760 return pkt_size; 10761 } 10762 10763 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 10764 { 10765 struct hl_cb *cb = prop->data; 10766 void *buf = (void *) (uintptr_t) (cb->kernel_address); 10767 10768 u64 monitor_base, fence_addr = 0; 10769 u32 stream_index, size = prop->size; 10770 u16 msg_addr_offset; 10771 10772 stream_index = prop->q_idx % 4; 10773 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] + 10774 QM_FENCE2_OFFSET + stream_index * 4; 10775 10776 /* 10777 * monitor_base should be the content of the base0 address registers, 10778 * so it will be added to the msg short offsets 10779 */ 10780 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 10781 10782 /* First monitor config packet: low address of the sync */ 10783 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) - 10784 monitor_base; 10785 10786 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset); 10787 10788 /* Second monitor config packet: high address of the sync */ 10789 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) - 10790 monitor_base; 10791 10792 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset); 10793 10794 /* 10795 * Third monitor config packet: the payload, i.e. what to write when the 10796 * sync triggers 10797 */ 10798 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) - 10799 monitor_base; 10800 10801 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset); 10802 10803 /* Fourth monitor config packet: bind the monitor to a sync object */ 10804 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base; 10805 10806 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask, 10807 prop->sob_val, msg_addr_offset); 10808 10809 /* Fence packet */ 10810 size += gaudi2_add_fence_pkt(buf + size); 10811 10812 return size; 10813 } 10814 10815 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 10816 { 10817 struct hl_hw_sob *hw_sob = data; 10818 10819 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 10820 10821 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0); 10822 10823 kref_init(&hw_sob->kref); 10824 } 10825 10826 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group) 10827 { 10828 } 10829 10830 static u64 gaudi2_get_device_time(struct hl_device *hdev) 10831 { 10832 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 10833 10834 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 10835 } 10836 10837 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs) 10838 { 10839 return 0; 10840 } 10841 10842 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, 10843 struct hl_cs *cs, u32 wait_queue_id, 10844 u32 collective_engine_id, u32 encaps_signal_offset) 10845 { 10846 return -EINVAL; 10847 } 10848 10849 /* 10850 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address 10851 * to DMMU page-size address (64MB) before mapping it in 10852 * the MMU. 10853 * The operation is performed on both the virtual and physical addresses. 10854 * for device with 6 HBMs the scramble is: 10855 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48] 10856 * 10857 * Example: 10858 * ============================================================================= 10859 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA 10860 * Phys address in MMU last 10861 * HOP 10862 * ============================================================================= 10863 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1 10864 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3 10865 * ============================================================================= 10866 */ 10867 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr) 10868 { 10869 struct asic_fixed_properties *prop = &hdev->asic_prop; 10870 u32 divisor, mod_va; 10871 u64 div_va; 10872 10873 /* accept any address in the DRAM address space */ 10874 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE, 10875 VA_HBM_SPACE_END)) { 10876 10877 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10878 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va); 10879 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) | 10880 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) | 10881 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT); 10882 } 10883 10884 return raw_addr; 10885 } 10886 10887 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr) 10888 { 10889 struct asic_fixed_properties *prop = &hdev->asic_prop; 10890 u32 divisor, mod_va; 10891 u64 div_va; 10892 10893 /* accept any address in the DRAM address space */ 10894 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE, 10895 VA_HBM_SPACE_END)) { 10896 10897 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10898 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, 10899 PAGE_SIZE_64MB, &mod_va); 10900 10901 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) + 10902 (div_va * divisor + mod_va)); 10903 } 10904 10905 return scrambled_addr; 10906 } 10907 10908 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id) 10909 { 10910 u32 base = 0, dcore_id, dec_id; 10911 10912 if (core_id >= NUMBER_OF_DEC) { 10913 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id); 10914 goto out; 10915 } 10916 10917 if (core_id < 8) { 10918 dcore_id = core_id / NUM_OF_DEC_PER_DCORE; 10919 dec_id = core_id % NUM_OF_DEC_PER_DCORE; 10920 10921 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET + 10922 dec_id * DCORE_VDEC_OFFSET; 10923 } else { 10924 /* PCIe Shared Decoder */ 10925 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET); 10926 } 10927 out: 10928 return base; 10929 } 10930 10931 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 10932 u32 *block_size, u32 *block_id) 10933 { 10934 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10935 int i; 10936 10937 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) { 10938 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) { 10939 *block_id = i; 10940 if (block_size) 10941 *block_size = gaudi2->mapped_blocks[i].size; 10942 return 0; 10943 } 10944 } 10945 10946 dev_err(hdev->dev, "Invalid block address %#llx", block_addr); 10947 10948 return -EINVAL; 10949 } 10950 10951 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 10952 u32 block_id, u32 block_size) 10953 { 10954 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10955 u64 offset_in_bar; 10956 u64 address; 10957 int rc; 10958 10959 if (block_id >= NUM_USER_MAPPED_BLOCKS) { 10960 dev_err(hdev->dev, "Invalid block id %u", block_id); 10961 return -EINVAL; 10962 } 10963 10964 /* we allow mapping only an entire block */ 10965 if (block_size != gaudi2->mapped_blocks[block_id].size) { 10966 dev_err(hdev->dev, "Invalid block size %u", block_size); 10967 return -EINVAL; 10968 } 10969 10970 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR; 10971 10972 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; 10973 10974 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 10975 VM_DONTCOPY | VM_NORESERVE); 10976 10977 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 10978 block_size, vma->vm_page_prot); 10979 if (rc) 10980 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 10981 10982 return rc; 10983 } 10984 10985 static void gaudi2_enable_events_from_fw(struct hl_device *hdev) 10986 { 10987 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10988 10989 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 10990 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq); 10991 10992 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 10993 WREG32(irq_handler_offset, 10994 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id); 10995 } 10996 10997 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base) 10998 { 10999 switch (mmu_id) { 11000 case HW_CAP_DCORE0_DMMU0: 11001 *mmu_base = mmDCORE0_HMMU0_MMU_BASE; 11002 break; 11003 case HW_CAP_DCORE0_DMMU1: 11004 *mmu_base = mmDCORE0_HMMU1_MMU_BASE; 11005 break; 11006 case HW_CAP_DCORE0_DMMU2: 11007 *mmu_base = mmDCORE0_HMMU2_MMU_BASE; 11008 break; 11009 case HW_CAP_DCORE0_DMMU3: 11010 *mmu_base = mmDCORE0_HMMU3_MMU_BASE; 11011 break; 11012 case HW_CAP_DCORE1_DMMU0: 11013 *mmu_base = mmDCORE1_HMMU0_MMU_BASE; 11014 break; 11015 case HW_CAP_DCORE1_DMMU1: 11016 *mmu_base = mmDCORE1_HMMU1_MMU_BASE; 11017 break; 11018 case HW_CAP_DCORE1_DMMU2: 11019 *mmu_base = mmDCORE1_HMMU2_MMU_BASE; 11020 break; 11021 case HW_CAP_DCORE1_DMMU3: 11022 *mmu_base = mmDCORE1_HMMU3_MMU_BASE; 11023 break; 11024 case HW_CAP_DCORE2_DMMU0: 11025 *mmu_base = mmDCORE2_HMMU0_MMU_BASE; 11026 break; 11027 case HW_CAP_DCORE2_DMMU1: 11028 *mmu_base = mmDCORE2_HMMU1_MMU_BASE; 11029 break; 11030 case HW_CAP_DCORE2_DMMU2: 11031 *mmu_base = mmDCORE2_HMMU2_MMU_BASE; 11032 break; 11033 case HW_CAP_DCORE2_DMMU3: 11034 *mmu_base = mmDCORE2_HMMU3_MMU_BASE; 11035 break; 11036 case HW_CAP_DCORE3_DMMU0: 11037 *mmu_base = mmDCORE3_HMMU0_MMU_BASE; 11038 break; 11039 case HW_CAP_DCORE3_DMMU1: 11040 *mmu_base = mmDCORE3_HMMU1_MMU_BASE; 11041 break; 11042 case HW_CAP_DCORE3_DMMU2: 11043 *mmu_base = mmDCORE3_HMMU2_MMU_BASE; 11044 break; 11045 case HW_CAP_DCORE3_DMMU3: 11046 *mmu_base = mmDCORE3_HMMU3_MMU_BASE; 11047 break; 11048 case HW_CAP_PMMU: 11049 *mmu_base = mmPMMU_HBW_MMU_BASE; 11050 break; 11051 default: 11052 return -EINVAL; 11053 } 11054 11055 return 0; 11056 } 11057 11058 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 11059 { 11060 bool is_pmmu = (mmu_id == HW_CAP_PMMU); 11061 struct gaudi2_device *gaudi2 = hdev->asic_specific; 11062 u32 mmu_base; 11063 11064 if (!(gaudi2->hw_cap_initialized & mmu_id)) 11065 return; 11066 11067 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) 11068 return; 11069 11070 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); 11071 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 11072 } 11073 11074 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 11075 { 11076 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES; 11077 11078 /* check all HMMUs */ 11079 for (i = 0 ; i < num_of_hmmus ; i++) { 11080 mmu_id = HW_CAP_DCORE0_DMMU0 << i; 11081 11082 if (mmu_cap_mask & mmu_id) 11083 gaudi2_ack_mmu_error(hdev, mmu_id); 11084 } 11085 11086 /* check PMMU */ 11087 if (mmu_cap_mask & HW_CAP_PMMU) 11088 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU); 11089 11090 return 0; 11091 } 11092 11093 static void gaudi2_get_msi_info(__le32 *table) 11094 { 11095 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); 11096 } 11097 11098 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) 11099 { 11100 switch (pll_idx) { 11101 case HL_GAUDI2_CPU_PLL: return CPU_PLL; 11102 case HL_GAUDI2_PCI_PLL: return PCI_PLL; 11103 case HL_GAUDI2_NIC_PLL: return NIC_PLL; 11104 case HL_GAUDI2_DMA_PLL: return DMA_PLL; 11105 case HL_GAUDI2_MESH_PLL: return MESH_PLL; 11106 case HL_GAUDI2_MME_PLL: return MME_PLL; 11107 case HL_GAUDI2_TPC_PLL: return TPC_PLL; 11108 case HL_GAUDI2_IF_PLL: return IF_PLL; 11109 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL; 11110 case HL_GAUDI2_HBM_PLL: return HBM_PLL; 11111 case HL_GAUDI2_VID_PLL: return VID_PLL; 11112 case HL_GAUDI2_MSS_PLL: return MSS_PLL; 11113 default: return -EINVAL; 11114 } 11115 } 11116 11117 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) 11118 { 11119 /* Not implemented */ 11120 return 0; 11121 } 11122 11123 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon) 11124 { 11125 /* Not implemented */ 11126 return 0; 11127 } 11128 11129 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset, 11130 struct hl_device *hdev, struct hl_mon_state_dump *mon) 11131 { 11132 /* Not implemented */ 11133 return 0; 11134 } 11135 11136 11137 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset, 11138 u64 status_base_offset, enum hl_sync_engine_type engine_type, 11139 u32 engine_id, char **buf, size_t *size, size_t *offset) 11140 { 11141 /* Not implemented */ 11142 return 0; 11143 } 11144 11145 11146 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = { 11147 .monitor_valid = gaudi2_monitor_valid, 11148 .print_single_monitor = gaudi2_print_single_monitor, 11149 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map, 11150 .print_fences_single_engine = gaudi2_print_fences_single_engine, 11151 }; 11152 11153 static void gaudi2_state_dump_init(struct hl_device *hdev) 11154 { 11155 /* Not implemented */ 11156 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props; 11157 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs; 11158 } 11159 11160 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id) 11161 { 11162 return 0; 11163 } 11164 11165 static u32 *gaudi2_get_stream_master_qid_arr(void) 11166 { 11167 return NULL; 11168 } 11169 11170 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 11171 struct attribute_group *dev_vrm_attr_grp) 11172 { 11173 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 11174 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp); 11175 } 11176 11177 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 11178 u32 page_size, u32 *real_page_size, bool is_dram_addr) 11179 { 11180 struct asic_fixed_properties *prop = &hdev->asic_prop; 11181 11182 /* for host pages the page size must be */ 11183 if (!is_dram_addr) { 11184 if (page_size % mmu_prop->page_size) 11185 goto page_size_err; 11186 11187 *real_page_size = mmu_prop->page_size; 11188 return 0; 11189 } 11190 11191 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size)) 11192 goto page_size_err; 11193 11194 /* 11195 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater 11196 * than DRAM page size). 11197 * for this reason work with the DRAM page size and let the MMU scrambling routine handle 11198 * this mismatch when calculating the address to place in the MMU page table. 11199 * (in that case also make sure that the dram_page_size is not greater than the 11200 * mmu page size) 11201 */ 11202 *real_page_size = prop->dram_page_size; 11203 11204 return 0; 11205 11206 page_size_err: 11207 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 11208 page_size, mmu_prop->page_size >> 10); 11209 return -EFAULT; 11210 } 11211 11212 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) 11213 { 11214 return -EOPNOTSUPP; 11215 } 11216 11217 int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 11218 { 11219 struct gaudi2_device *gaudi2 = hdev->asic_specific; 11220 11221 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 11222 return 0; 11223 11224 return hl_fw_send_device_activity(hdev, open); 11225 } 11226 11227 static const struct hl_asic_funcs gaudi2_funcs = { 11228 .early_init = gaudi2_early_init, 11229 .early_fini = gaudi2_early_fini, 11230 .late_init = gaudi2_late_init, 11231 .late_fini = gaudi2_late_fini, 11232 .sw_init = gaudi2_sw_init, 11233 .sw_fini = gaudi2_sw_fini, 11234 .hw_init = gaudi2_hw_init, 11235 .hw_fini = gaudi2_hw_fini, 11236 .halt_engines = gaudi2_halt_engines, 11237 .suspend = gaudi2_suspend, 11238 .resume = gaudi2_resume, 11239 .mmap = gaudi2_mmap, 11240 .ring_doorbell = gaudi2_ring_doorbell, 11241 .pqe_write = gaudi2_pqe_write, 11242 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent, 11243 .asic_dma_free_coherent = gaudi2_dma_free_coherent, 11244 .scrub_device_mem = gaudi2_scrub_device_mem, 11245 .scrub_device_dram = gaudi2_scrub_device_dram, 11246 .get_int_queue_base = NULL, 11247 .test_queues = gaudi2_test_queues, 11248 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc, 11249 .asic_dma_pool_free = gaudi2_dma_pool_free, 11250 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, 11251 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, 11252 .asic_dma_unmap_single = gaudi2_dma_unmap_single, 11253 .asic_dma_map_single = gaudi2_dma_map_single, 11254 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 11255 .cs_parser = gaudi2_cs_parser, 11256 .asic_dma_map_sgtable = hl_dma_map_sgtable, 11257 .add_end_of_cb_packets = NULL, 11258 .update_eq_ci = gaudi2_update_eq_ci, 11259 .context_switch = gaudi2_context_switch, 11260 .restore_phase_topology = gaudi2_restore_phase_topology, 11261 .debugfs_read_dma = gaudi2_debugfs_read_dma, 11262 .add_device_attr = gaudi2_add_device_attr, 11263 .handle_eqe = gaudi2_handle_eqe, 11264 .get_events_stat = gaudi2_get_events_stat, 11265 .read_pte = NULL, 11266 .write_pte = NULL, 11267 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, 11268 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, 11269 .mmu_prefetch_cache_range = NULL, 11270 .send_heartbeat = gaudi2_send_heartbeat, 11271 .debug_coresight = gaudi2_debug_coresight, 11272 .is_device_idle = gaudi2_is_device_idle, 11273 .compute_reset_late_init = gaudi2_compute_reset_late_init, 11274 .hw_queues_lock = gaudi2_hw_queues_lock, 11275 .hw_queues_unlock = gaudi2_hw_queues_unlock, 11276 .get_pci_id = gaudi2_get_pci_id, 11277 .get_eeprom_data = gaudi2_get_eeprom_data, 11278 .get_monitor_dump = gaudi2_get_monitor_dump, 11279 .send_cpu_message = gaudi2_send_cpu_message, 11280 .pci_bars_map = gaudi2_pci_bars_map, 11281 .init_iatu = gaudi2_init_iatu, 11282 .rreg = hl_rreg, 11283 .wreg = hl_wreg, 11284 .halt_coresight = gaudi2_halt_coresight, 11285 .ctx_init = gaudi2_ctx_init, 11286 .ctx_fini = gaudi2_ctx_fini, 11287 .pre_schedule_cs = gaudi2_pre_schedule_cs, 11288 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq, 11289 .load_firmware_to_device = NULL, 11290 .load_boot_fit_to_device = NULL, 11291 .get_signal_cb_size = gaudi2_get_signal_cb_size, 11292 .get_wait_cb_size = gaudi2_get_wait_cb_size, 11293 .gen_signal_cb = gaudi2_gen_signal_cb, 11294 .gen_wait_cb = gaudi2_gen_wait_cb, 11295 .reset_sob = gaudi2_reset_sob, 11296 .reset_sob_group = gaudi2_reset_sob_group, 11297 .get_device_time = gaudi2_get_device_time, 11298 .pb_print_security_errors = gaudi2_pb_print_security_errors, 11299 .collective_wait_init_cs = gaudi2_collective_wait_init_cs, 11300 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs, 11301 .get_dec_base_addr = gaudi2_get_dec_base_addr, 11302 .scramble_addr = gaudi2_mmu_scramble_addr, 11303 .descramble_addr = gaudi2_mmu_descramble_addr, 11304 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors, 11305 .get_hw_block_id = gaudi2_get_hw_block_id, 11306 .hw_block_mmap = gaudi2_block_mmap, 11307 .enable_events_from_fw = gaudi2_enable_events_from_fw, 11308 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error, 11309 .get_msi_info = gaudi2_get_msi_info, 11310 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx, 11311 .init_firmware_preload_params = gaudi2_init_firmware_preload_params, 11312 .init_firmware_loader = gaudi2_init_firmware_loader, 11313 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm, 11314 .state_dump_init = gaudi2_state_dump_init, 11315 .get_sob_addr = &gaudi2_get_sob_addr, 11316 .set_pci_memory_regions = gaudi2_set_pci_memory_regions, 11317 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr, 11318 .check_if_razwi_happened = gaudi2_check_if_razwi_happened, 11319 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 11320 .access_dev_mem = hl_access_dev_mem, 11321 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 11322 .set_engine_cores = gaudi2_set_engine_cores, 11323 .set_engines = gaudi2_set_engines, 11324 .send_device_activity = gaudi2_send_device_activity, 11325 .set_dram_properties = gaudi2_set_dram_properties, 11326 .set_binning_masks = gaudi2_set_binning_masks, 11327 }; 11328 11329 void gaudi2_set_asic_funcs(struct hl_device *hdev) 11330 { 11331 hdev->asic_funcs = &gaudi2_funcs; 11332 } 11333