1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2020-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudi2P.h" 9 #include "gaudi2_masks.h" 10 #include "../include/gaudi2/gaudi2_special_blocks.h" 11 #include "../include/hw_ip/mmu/mmu_general.h" 12 #include "../include/hw_ip/mmu/mmu_v2_0.h" 13 #include "../include/gaudi2/gaudi2_packets.h" 14 #include "../include/gaudi2/gaudi2_reg_map.h" 15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h" 16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h" 17 18 #include <linux/module.h> 19 #include <linux/pci.h> 20 #include <linux/hwmon.h> 21 #include <linux/iommu.h> 22 23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 24 25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 26 27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */ 28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ 30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */ 31 #define GAUDI2_RESET_POLL_CNT 3 32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */ 33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */ 34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 35 #define GAUDI2_CB_POOL_CB_CNT 512 36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */ 37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */ 39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 41 42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3 43 44 /* 45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs 46 * and the code relies on that value (for array size etc..) we define another value 47 * for MAX faulty TPCs which reflects the cluster binning requirements 48 */ 49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1 50 #define MAX_FAULTY_XBARS 1 51 #define MAX_FAULTY_EDMAS 1 52 #define MAX_FAULTY_DECODERS 1 53 54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF 55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF 56 #define GAUDI2_DECODER_FULL_MASK 0x3FF 57 58 #define GAUDI2_NA_EVENT_CAUSE 0xFF 59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18 60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE 25 61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3 62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14 63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9 74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3 75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3 76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2 77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2 78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2 79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5 80 81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10) 82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200) 83 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000) 84 85 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */ 86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100) 87 88 #define KDMA_TIMEOUT_USEC USEC_PER_SEC 89 90 #define IS_DMA_IDLE(dma_core_sts0) \ 91 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK))) 92 93 #define IS_DMA_HALTED(dma_core_sts1) \ 94 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK)) 95 96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) 97 98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK)) 99 100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \ 101 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \ 102 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \ 103 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK))) 104 105 #define PCIE_DEC_EN_MASK 0x300 106 #define DEC_WORK_STATE_IDLE 0 107 #define DEC_WORK_STATE_PEND 3 108 #define IS_DEC_IDLE(dec_swreg15) \ 109 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \ 110 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND) 111 112 /* HBM MMU address scrambling parameters */ 113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M 114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26 115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0 116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK 117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16 118 #define MMU_RANGE_INV_VA_LSB_SHIFT 12 119 #define MMU_RANGE_INV_VA_MSB_SHIFT 44 120 #define MMU_RANGE_INV_EN_SHIFT 0 121 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 122 #define MMU_RANGE_INV_ASID_SHIFT 2 123 124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 125 * a 2 entries FIFO, and hence it is not enabled for it. 126 */ 127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 129 130 #define GAUDI2_MAX_STRING_LEN 64 131 132 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ 133 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1) 134 135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) 136 137 /* RAZWI initiator coordinates */ 138 #define RAZWI_GET_AXUSER_XY(x) \ 139 ((x & 0xF8001FF0) >> 4) 140 141 #define RAZWI_GET_AXUSER_LOW_XY(x) \ 142 ((x & 0x00001FF0) >> 4) 143 144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0 145 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F 146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5 147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF 148 149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23 150 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F 151 152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \ 153 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \ 154 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT)) 155 156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \ 157 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT) 158 159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \ 160 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh)) 161 162 #define PSOC_RAZWI_ENG_STR_SIZE 128 163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5 164 165 struct gaudi2_razwi_info { 166 u32 axuser_xy; 167 u32 rtr_ctrl; 168 u16 eng_id; 169 char *eng_name; 170 }; 171 172 static struct gaudi2_razwi_info common_razwi_info[] = { 173 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE, 174 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"}, 175 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE, 176 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"}, 177 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE, 178 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"}, 179 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE, 180 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"}, 181 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE, 182 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"}, 183 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE, 184 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"}, 185 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE, 186 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"}, 187 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE, 188 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"}, 189 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE, 190 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"}, 191 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE, 192 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"}, 193 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE, 194 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"}, 195 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE, 196 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"}, 197 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE, 198 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"}, 199 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE, 200 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"}, 201 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE, 202 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"}, 203 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE, 204 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"}, 205 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE, 206 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"}, 207 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE, 208 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"}, 209 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE, 210 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"}, 211 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE, 212 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"}, 213 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE, 214 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"}, 215 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE, 216 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"}, 217 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE, 218 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"}, 219 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE, 220 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"}, 221 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE, 222 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"}, 223 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE, 224 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"}, 225 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE, 226 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"}, 227 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE, 228 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"}, 229 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE, 230 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"}, 231 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE, 232 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"}, 233 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE, 234 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"}, 235 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE, 236 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"}, 237 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE, 238 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"}, 239 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE, 240 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"}, 241 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE, 242 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"}, 243 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE, 244 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"}, 245 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE, 246 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"}, 247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE, 248 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"}, 249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE, 250 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"}, 251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE, 252 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"}, 253 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, 254 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"}, 255 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE, 256 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"}, 257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE, 258 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"}, 259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE, 260 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"}, 261 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE, 262 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"}, 263 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE, 264 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"}, 265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE, 266 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"}, 267 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE, 268 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"}, 269 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE, 270 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"}, 271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE, 272 GAUDI2_ENGINE_ID_SIZE, "PMMU"}, 273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE, 274 GAUDI2_ENGINE_ID_SIZE, "PCIE"}, 275 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE, 276 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"}, 277 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE, 278 GAUDI2_ENGINE_ID_KDMA, "KDMA"}, 279 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE, 280 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"}, 281 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE, 282 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"}, 283 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE, 284 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"}, 285 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE, 286 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"}, 287 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE, 288 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"}, 289 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE, 290 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"}, 291 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE, 292 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"}, 293 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE, 294 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"}, 295 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 296 GAUDI2_ENGINE_ID_SIZE, "HMMU0"}, 297 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 298 GAUDI2_ENGINE_ID_SIZE, "HMMU1"}, 299 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 300 GAUDI2_ENGINE_ID_SIZE, "HMMU2"}, 301 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 302 GAUDI2_ENGINE_ID_SIZE, "HMMU3"}, 303 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 304 GAUDI2_ENGINE_ID_SIZE, "HMMU4"}, 305 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 306 GAUDI2_ENGINE_ID_SIZE, "HMMU5"}, 307 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE, 308 GAUDI2_ENGINE_ID_SIZE, "HMMU6"}, 309 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE, 310 GAUDI2_ENGINE_ID_SIZE, "HMMU7"}, 311 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 312 GAUDI2_ENGINE_ID_SIZE, "HMMU8"}, 313 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 314 GAUDI2_ENGINE_ID_SIZE, "HMMU9"}, 315 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 316 GAUDI2_ENGINE_ID_SIZE, "HMMU10"}, 317 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 318 GAUDI2_ENGINE_ID_SIZE, "HMMU11"}, 319 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 320 GAUDI2_ENGINE_ID_SIZE, "HMMU12"}, 321 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 322 GAUDI2_ENGINE_ID_SIZE, "HMMU13"}, 323 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE, 324 GAUDI2_ENGINE_ID_SIZE, "HMMU14"}, 325 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE, 326 GAUDI2_ENGINE_ID_SIZE, "HMMU15"}, 327 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, 328 GAUDI2_ENGINE_ID_ROT_0, "ROT0"}, 329 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE, 330 GAUDI2_ENGINE_ID_ROT_1, "ROT1"}, 331 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE, 332 GAUDI2_ENGINE_ID_PSOC, "CPU"}, 333 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE, 334 GAUDI2_ENGINE_ID_PSOC, "PSOC"} 335 }; 336 337 static struct gaudi2_razwi_info mme_razwi_info[] = { 338 /* MME X high coordinate is N/A, hence using only low coordinates */ 339 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE, 340 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"}, 341 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, 342 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"}, 343 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE, 344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"}, 345 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, 346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"}, 347 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE, 348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"}, 349 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE, 350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"}, 351 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE, 352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"}, 353 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE, 354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"}, 355 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE, 356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"}, 357 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE, 358 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"}, 359 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, 360 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"}, 361 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE, 362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"}, 363 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, 364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"}, 365 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE, 366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"}, 367 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE, 368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"}, 369 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE, 370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"}, 371 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE, 372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"}, 373 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE, 374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"}, 375 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE, 376 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"}, 377 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, 378 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"}, 379 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE, 380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"}, 381 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, 382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"}, 383 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE, 384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"}, 385 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE, 386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"}, 387 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE, 388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"}, 389 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE, 390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"}, 391 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE, 392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"}, 393 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE, 394 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"}, 395 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, 396 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"}, 397 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE, 398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"}, 399 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, 400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"}, 401 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE, 402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"}, 403 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE, 404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"}, 405 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE, 406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"}, 407 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE, 408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"}, 409 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE, 410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"} 411 }; 412 413 enum hl_pmmu_fatal_cause { 414 LATENCY_RD_OUT_FIFO_OVERRUN, 415 LATENCY_WR_OUT_FIFO_OVERRUN, 416 }; 417 418 enum hl_pcie_drain_ind_cause { 419 LBW_AXI_DRAIN_IND, 420 HBW_AXI_DRAIN_IND 421 }; 422 423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = { 424 [HBM_ID0] = 0xFFFC, 425 [HBM_ID1] = 0xFFCF, 426 [HBM_ID2] = 0xF7F7, 427 [HBM_ID3] = 0x7F7F, 428 [HBM_ID4] = 0xFCFF, 429 [HBM_ID5] = 0xCFFF, 430 }; 431 432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = { 433 [0] = HBM_ID0, 434 [1] = HBM_ID1, 435 [2] = HBM_ID4, 436 [3] = HBM_ID5, 437 }; 438 439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = { 440 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0, 441 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2, 442 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1, 443 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3, 444 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2, 445 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4, 446 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3, 447 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5, 448 }; 449 450 static const int gaudi2_qman_async_event_id[] = { 451 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM, 452 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM, 453 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM, 454 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM, 455 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM, 456 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM, 457 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM, 458 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM, 459 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM, 460 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM, 461 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM, 462 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM, 463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM, 464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM, 465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM, 466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM, 467 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM, 468 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM, 469 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM, 470 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM, 471 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM, 472 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM, 473 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM, 474 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM, 475 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM, 476 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM, 477 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM, 478 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM, 479 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM, 480 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM, 481 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM, 482 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM, 483 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM, 484 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM, 485 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM, 486 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM, 487 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM, 488 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM, 489 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM, 490 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM, 491 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM, 492 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM, 493 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM, 494 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM, 495 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM, 496 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM, 497 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM, 498 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM, 499 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM, 500 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM, 501 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM, 502 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM, 503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM, 504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM, 505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM, 506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM, 507 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM, 508 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM, 509 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM, 510 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM, 511 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM, 512 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM, 513 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM, 514 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM, 515 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM, 516 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM, 517 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM, 518 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM, 519 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM, 520 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM, 521 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM, 522 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM, 523 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM, 524 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM, 525 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM, 526 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM, 527 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM, 528 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM, 529 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM, 530 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM, 531 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM, 532 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM, 533 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM, 534 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM, 535 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM, 536 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM, 537 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM, 538 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM, 539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM, 540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM, 541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM, 542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM, 543 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM, 544 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM, 545 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM, 546 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM, 547 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM, 548 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM, 549 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM, 550 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM, 551 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM, 552 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM, 553 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM, 554 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM, 555 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM, 556 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM, 557 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM, 558 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM, 559 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM, 560 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM, 561 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM, 562 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM, 563 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM, 564 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM, 565 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM, 566 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM, 567 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM, 568 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM, 569 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM, 570 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM, 571 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM, 572 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM, 573 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM, 574 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM, 575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM, 576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM, 577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM, 578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM, 579 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM, 580 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM, 581 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM, 582 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM, 583 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM, 584 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM, 585 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM, 586 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM, 587 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM, 588 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM, 589 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM, 590 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM, 591 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM, 592 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM, 593 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM, 594 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM, 595 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM, 596 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM, 597 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM, 598 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM, 599 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM, 600 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM, 601 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM, 602 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM, 603 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM, 604 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM, 605 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM, 606 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM, 607 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0, 608 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0, 609 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0, 610 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0, 611 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1, 612 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1, 613 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1, 614 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1, 615 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0, 616 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0, 617 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0, 618 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0, 619 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1, 620 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1, 621 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1, 622 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1, 623 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0, 624 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0, 625 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0, 626 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0, 627 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1, 628 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1, 629 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1, 630 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1, 631 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0, 632 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0, 633 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0, 634 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0, 635 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1, 636 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1, 637 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1, 638 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1, 639 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0, 640 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0, 641 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0, 642 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0, 643 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1, 644 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1, 645 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1, 646 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1, 647 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0, 648 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0, 649 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0, 650 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0, 651 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1, 652 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1, 653 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1, 654 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1, 655 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0, 656 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0, 657 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0, 658 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0, 659 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1, 660 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1, 661 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1, 662 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1, 663 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0, 664 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0, 665 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0, 666 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0, 667 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1, 668 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1, 669 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1, 670 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1, 671 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0, 672 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0, 673 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0, 674 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0, 675 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1, 676 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1, 677 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1, 678 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1, 679 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0, 680 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0, 681 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0, 682 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0, 683 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1, 684 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1, 685 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1, 686 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1, 687 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0, 688 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0, 689 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0, 690 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0, 691 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1, 692 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1, 693 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1, 694 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1, 695 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0, 696 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0, 697 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0, 698 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0, 699 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1, 700 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1, 701 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1, 702 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1, 703 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 704 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 705 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 706 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM, 707 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 708 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 709 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM, 710 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM 711 }; 712 713 static const int gaudi2_dma_core_async_event_id[] = { 714 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE, 715 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE, 716 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE, 717 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE, 718 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE, 719 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE, 720 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE, 721 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE, 722 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE, 723 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE, 724 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 725 }; 726 727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 728 "qman sei intr", 729 "arc sei intr" 730 }; 731 732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = { 733 "AXI_TERMINATOR WR", 734 "AXI_TERMINATOR RD", 735 "AXI SPLIT SEI Status" 736 }; 737 738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = { 739 "cbu_bresp_sei_intr_cause", 740 "cbu_rresp_sei_intr_cause", 741 "lbu_bresp_sei_intr_cause", 742 "lbu_rresp_sei_intr_cause", 743 "cbu_axi_split_intr_cause", 744 "lbu_axi_split_intr_cause", 745 "arc_ip_excptn_sei_intr_cause", 746 "dmi_bresp_sei_intr_cause", 747 "aux2apb_err_sei_intr_cause", 748 "cfg_lbw_wr_terminated_intr_cause", 749 "cfg_lbw_rd_terminated_intr_cause", 750 "cfg_dccm_wr_terminated_intr_cause", 751 "cfg_dccm_rd_terminated_intr_cause", 752 "cfg_hbw_rd_terminated_intr_cause" 753 }; 754 755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = { 756 "msix_vcd_hbw_sei", 757 "msix_l2c_hbw_sei", 758 "msix_nrm_hbw_sei", 759 "msix_abnrm_hbw_sei", 760 "msix_vcd_lbw_sei", 761 "msix_l2c_lbw_sei", 762 "msix_nrm_lbw_sei", 763 "msix_abnrm_lbw_sei", 764 "apb_vcd_lbw_sei", 765 "apb_l2c_lbw_sei", 766 "apb_nrm_lbw_sei", 767 "apb_abnrm_lbw_sei", 768 "dec_sei", 769 "dec_apb_sei", 770 "trc_apb_sei", 771 "lbw_mstr_if_sei", 772 "axi_split_bresp_err_sei", 773 "hbw_axi_wr_viol_sei", 774 "hbw_axi_rd_viol_sei", 775 "lbw_axi_wr_viol_sei", 776 "lbw_axi_rd_viol_sei", 777 "vcd_spi", 778 "l2c_spi", 779 "nrm_spi", 780 "abnrm_spi", 781 }; 782 783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = { 784 "PQ AXI HBW error", 785 "CQ AXI HBW error", 786 "CP AXI HBW error", 787 "CP error due to undefined OPCODE", 788 "CP encountered STOP OPCODE", 789 "CP AXI LBW error", 790 "CP WRREG32 or WRBULK returned error", 791 "N/A", 792 "FENCE 0 inc over max value and clipped", 793 "FENCE 1 inc over max value and clipped", 794 "FENCE 2 inc over max value and clipped", 795 "FENCE 3 inc over max value and clipped", 796 "FENCE 0 dec under min value and clipped", 797 "FENCE 1 dec under min value and clipped", 798 "FENCE 2 dec under min value and clipped", 799 "FENCE 3 dec under min value and clipped", 800 "CPDMA Up overflow", 801 "PQC L2H error" 802 }; 803 804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = { 805 "RSVD0", 806 "CQ AXI HBW error", 807 "CP AXI HBW error", 808 "CP error due to undefined OPCODE", 809 "CP encountered STOP OPCODE", 810 "CP AXI LBW error", 811 "CP WRREG32 or WRBULK returned error", 812 "N/A", 813 "FENCE 0 inc over max value and clipped", 814 "FENCE 1 inc over max value and clipped", 815 "FENCE 2 inc over max value and clipped", 816 "FENCE 3 inc over max value and clipped", 817 "FENCE 0 dec under min value and clipped", 818 "FENCE 1 dec under min value and clipped", 819 "FENCE 2 dec under min value and clipped", 820 "FENCE 3 dec under min value and clipped", 821 "CPDMA Up overflow", 822 "RSVD17", 823 "CQ_WR_IFIFO_CI_ERR", 824 "CQ_WR_CTL_CI_ERR", 825 "ARC_CQF_RD_ERR", 826 "ARC_CQ_WR_IFIFO_CI_ERR", 827 "ARC_CQ_WR_CTL_CI_ERR", 828 "ARC_AXI_ERR", 829 "CP_SWITCH_WDT_ERR" 830 }; 831 832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = { 833 "Choice push while full error", 834 "Choice Q watchdog error", 835 "MSG AXI LBW returned with error" 836 }; 837 838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = { 839 "qm_axi_err", 840 "qm_trace_fence_events", 841 "qm_sw_err", 842 "qm_cp_sw_stop", 843 "lbw_mstr_rresp_err", 844 "lbw_mstr_bresp_err", 845 "lbw_msg_slverr", 846 "hbw_msg_slverr", 847 "wbc_slverr", 848 "hbw_mstr_rresp_err", 849 "hbw_mstr_bresp_err", 850 "sb_resp_intr", 851 "mrsb_resp_intr", 852 "core_dw_status_0", 853 "core_dw_status_1", 854 "core_dw_status_2", 855 "core_dw_status_3", 856 "core_dw_status_4", 857 "core_dw_status_5", 858 "core_dw_status_6", 859 "core_dw_status_7", 860 "async_arc2cpu_sei_intr", 861 }; 862 863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = { 864 "tpc_address_exceed_slm", 865 "tpc_div_by_0", 866 "tpc_spu_mac_overflow", 867 "tpc_spu_addsub_overflow", 868 "tpc_spu_abs_overflow", 869 "tpc_spu_fma_fp_dst_nan", 870 "tpc_spu_fma_fp_dst_inf", 871 "tpc_spu_convert_fp_dst_nan", 872 "tpc_spu_convert_fp_dst_inf", 873 "tpc_spu_fp_dst_denorm", 874 "tpc_vpu_mac_overflow", 875 "tpc_vpu_addsub_overflow", 876 "tpc_vpu_abs_overflow", 877 "tpc_vpu_convert_fp_dst_nan", 878 "tpc_vpu_convert_fp_dst_inf", 879 "tpc_vpu_fma_fp_dst_nan", 880 "tpc_vpu_fma_fp_dst_inf", 881 "tpc_vpu_fp_dst_denorm", 882 "tpc_assertions", 883 "tpc_illegal_instruction", 884 "tpc_pc_wrap_around", 885 "tpc_qm_sw_err", 886 "tpc_hbw_rresp_err", 887 "tpc_hbw_bresp_err", 888 "tpc_lbw_rresp_err", 889 "tpc_lbw_bresp_err", 890 "st_unlock_already_locked", 891 "invalid_lock_access", 892 "LD_L protection violation", 893 "ST_L protection violation", 894 }; 895 896 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { 897 "agu_resp_intr", 898 "qman_axi_err", 899 "wap sei (wbc axi err)", 900 "arc sei", 901 "cfg access error", 902 "qm_sw_err", 903 "sbte_dbg_intr_0", 904 "sbte_dbg_intr_1", 905 "sbte_dbg_intr_2", 906 "sbte_dbg_intr_3", 907 "sbte_dbg_intr_4", 908 "sbte_prtn_intr_0", 909 "sbte_prtn_intr_1", 910 "sbte_prtn_intr_2", 911 "sbte_prtn_intr_3", 912 "sbte_prtn_intr_4", 913 }; 914 915 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { 916 "i0", 917 "i1", 918 "i2", 919 "i3", 920 "i4", 921 }; 922 923 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { 924 "WBC ERR RESP_0", 925 "WBC ERR RESP_1", 926 "AP SOURCE POS INF", 927 "AP SOURCE NEG INF", 928 "AP SOURCE NAN", 929 "AP RESULT POS INF", 930 "AP RESULT NEG INF", 931 }; 932 933 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 934 "HBW Read returned with error RRESP", 935 "HBW write returned with error BRESP", 936 "LBW write returned with error BRESP", 937 "descriptor_fifo_overflow", 938 "KDMA SB LBW Read returned with error", 939 "KDMA WBC LBW Write returned with error", 940 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 941 "WRONG CFG FOR COMMIT IN LIN DMA" 942 }; 943 944 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = { 945 "HBW/LBW Read returned with error RRESP", 946 "HBW/LBW write returned with error BRESP", 947 "LBW write returned with error BRESP", 948 "descriptor_fifo_overflow", 949 "KDMA SB LBW Read returned with error", 950 "KDMA WBC LBW Write returned with error", 951 "TRANSPOSE ENGINE DESC FIFO OVERFLOW", 952 "WRONG CFG FOR COMMIT IN LIN DMA" 953 }; 954 955 struct gaudi2_sm_sei_cause_data { 956 const char *cause_name; 957 const char *log_name; 958 }; 959 960 static const struct gaudi2_sm_sei_cause_data 961 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = { 962 {"calculated SO value overflow/underflow", "SOB ID"}, 963 {"payload address of monitor is not aligned to 4B", "monitor addr"}, 964 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"}, 965 }; 966 967 static const char * const 968 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = { 969 "LATENCY_RD_OUT_FIFO_OVERRUN", 970 "LATENCY_WR_OUT_FIFO_OVERRUN", 971 }; 972 973 static const char * const 974 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = { 975 "LATENCY_RD_OUT_FIFO_OVERRUN", 976 "LATENCY_WR_OUT_FIFO_OVERRUN", 977 }; 978 979 static const char * const 980 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = { 981 "AXI drain HBW", 982 "AXI drain LBW", 983 }; 984 985 static const char * const 986 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { 987 "HBW error response", 988 "LBW error response", 989 "TLP is blocked by RR" 990 }; 991 992 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { 993 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, 994 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, 995 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE, 996 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE, 997 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE, 998 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE, 999 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE, 1000 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE, 1001 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE, 1002 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE, 1003 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE, 1004 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE, 1005 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE, 1006 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE, 1007 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE, 1008 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE, 1009 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE, 1010 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE, 1011 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE, 1012 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE, 1013 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE, 1014 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE, 1015 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE, 1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE, 1017 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE, 1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE, 1019 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE, 1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE, 1021 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE, 1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE, 1023 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE, 1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE, 1025 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE, 1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE, 1027 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE, 1028 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE, 1029 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE, 1030 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE, 1031 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE, 1032 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE, 1033 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE, 1034 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE, 1035 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE, 1036 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE, 1037 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE, 1038 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE, 1039 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE, 1040 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE, 1041 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE, 1042 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE, 1043 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE, 1044 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE, 1045 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE, 1046 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE, 1047 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE, 1048 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE, 1049 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE, 1050 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE, 1051 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE, 1052 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE, 1053 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE, 1054 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE, 1055 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE, 1056 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE, 1057 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE, 1058 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE, 1059 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE, 1060 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE, 1061 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE, 1062 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE, 1063 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE, 1064 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE, 1065 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE, 1066 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE, 1067 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE, 1068 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE, 1069 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE, 1070 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE, 1071 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE, 1072 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE, 1073 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE, 1074 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE, 1075 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE, 1076 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE, 1077 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE, 1078 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE, 1079 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE, 1080 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE, 1081 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE, 1082 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE, 1083 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE, 1084 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE, 1085 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE, 1086 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE, 1087 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE, 1088 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE, 1089 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE, 1090 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE, 1091 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE, 1092 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE, 1093 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE, 1094 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE, 1095 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE, 1096 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE, 1097 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE, 1098 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE, 1099 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE, 1100 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE, 1101 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE, 1102 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE, 1103 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE, 1104 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE, 1105 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE, 1106 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE, 1107 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE, 1108 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE, 1109 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE, 1110 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE, 1111 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE, 1112 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE, 1113 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE, 1114 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE, 1115 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE, 1116 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE, 1117 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE, 1118 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE, 1119 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE, 1120 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE, 1121 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE, 1122 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE, 1123 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE, 1124 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE, 1125 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE, 1126 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE, 1127 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE, 1128 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE, 1129 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE, 1130 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE, 1131 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE, 1132 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE, 1133 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE, 1134 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE, 1135 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE, 1136 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE, 1137 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE, 1138 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE, 1139 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE, 1140 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE, 1141 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE, 1142 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE, 1143 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE, 1144 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE, 1145 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE, 1146 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE, 1147 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE, 1148 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE, 1149 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE, 1150 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE, 1151 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE, 1152 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE, 1153 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE, 1154 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE, 1155 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE, 1156 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE, 1157 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE, 1158 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE, 1159 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE, 1160 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE, 1161 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE, 1162 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE, 1163 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE, 1164 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE, 1165 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE, 1166 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE, 1167 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE, 1168 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE, 1169 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE, 1170 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE, 1171 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE, 1172 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE, 1173 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE, 1174 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE, 1175 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE, 1176 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE, 1177 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE, 1178 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE, 1179 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE, 1180 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE, 1181 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE, 1182 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE, 1183 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE, 1184 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE, 1185 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE, 1186 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE, 1187 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE, 1188 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE, 1189 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE, 1190 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE, 1191 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE, 1192 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE, 1193 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE, 1194 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE, 1195 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE, 1196 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE, 1197 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE, 1198 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE, 1199 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE, 1200 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE, 1201 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE, 1202 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE, 1203 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE, 1204 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE, 1205 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE, 1206 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE, 1207 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE, 1208 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE, 1209 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE, 1210 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE, 1211 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE, 1212 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE, 1213 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE, 1214 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE, 1215 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE, 1216 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE, 1217 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE, 1218 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE, 1219 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE, 1220 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE, 1221 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE, 1222 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE, 1223 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE, 1224 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE, 1225 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE, 1226 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE, 1227 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE, 1228 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE, 1229 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE, 1230 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE, 1231 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE, 1232 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE, 1233 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE, 1234 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE, 1235 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE, 1236 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE, 1237 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE, 1238 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE, 1239 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE, 1240 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE, 1241 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE, 1242 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE, 1243 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE, 1244 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE, 1245 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE, 1246 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE, 1247 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE, 1248 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE, 1249 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE, 1250 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE, 1251 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE, 1252 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE 1253 }; 1254 1255 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = { 1256 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE, 1257 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE, 1258 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE, 1259 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE, 1260 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE, 1261 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE, 1262 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE, 1263 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE, 1264 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE, 1265 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE, 1266 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE, 1267 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE, 1268 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE, 1269 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE, 1270 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE, 1271 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE, 1272 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE, 1273 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE, 1274 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE, 1275 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE, 1276 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE, 1277 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE, 1278 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE, 1279 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE, 1280 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE, 1281 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE, 1282 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE, 1283 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE, 1284 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE, 1285 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE, 1286 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE, 1287 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE, 1288 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE, 1289 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE, 1290 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE, 1291 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE, 1292 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE, 1293 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE, 1294 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE, 1295 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE, 1296 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE, 1297 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE, 1298 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE, 1299 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE, 1300 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE, 1301 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE, 1302 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE, 1303 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE, 1304 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE, 1305 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE, 1306 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE, 1307 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE, 1308 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE, 1309 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE, 1310 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE, 1311 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE, 1312 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE, 1313 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE, 1314 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE, 1315 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE, 1316 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE, 1317 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE, 1318 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE, 1319 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE, 1320 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE, 1321 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE, 1322 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE, 1323 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE, 1324 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE, 1325 }; 1326 1327 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = { 1328 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE, 1329 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE, 1330 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE, 1331 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE, 1332 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE, 1333 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE, 1334 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE, 1335 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE, 1336 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE, 1337 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE, 1338 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE, 1339 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE, 1340 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE, 1341 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE, 1342 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE, 1343 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE, 1344 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE, 1345 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE, 1346 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE, 1347 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE, 1348 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE, 1349 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE, 1350 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE, 1351 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE, 1352 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE, 1353 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE, 1354 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE, 1355 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE, 1356 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE, 1357 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE, 1358 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE, 1359 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE, 1360 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE, 1361 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE, 1362 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE, 1363 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE, 1364 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE, 1365 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE, 1366 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE, 1367 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE, 1368 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE, 1369 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE, 1370 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE, 1371 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE, 1372 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE, 1373 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE, 1374 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE, 1375 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE, 1376 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE, 1377 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE, 1378 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE, 1379 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE, 1380 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE, 1381 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE, 1382 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE, 1383 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE, 1384 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE, 1385 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE, 1386 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE, 1387 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE, 1388 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE, 1389 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE, 1390 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE, 1391 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE, 1392 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE, 1393 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE, 1394 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE, 1395 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE, 1396 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE, 1397 }; 1398 1399 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = { 1400 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE, 1401 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE, 1402 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE, 1403 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE, 1404 }; 1405 1406 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = { 1407 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0, 1408 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0, 1409 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0, 1410 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0, 1411 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1, 1412 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1, 1413 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1, 1414 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1, 1415 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0, 1416 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0, 1417 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0, 1418 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0, 1419 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1, 1420 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1, 1421 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1, 1422 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1, 1423 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0, 1424 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0, 1425 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0, 1426 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0, 1427 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0, 1428 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0, 1429 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0, 1430 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0, 1431 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1, 1432 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1, 1433 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1, 1434 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1, 1435 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2, 1436 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2, 1437 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2, 1438 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2, 1439 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3, 1440 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3, 1441 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3, 1442 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3, 1443 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4, 1444 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4, 1445 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4, 1446 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4, 1447 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5, 1448 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5, 1449 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5, 1450 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5, 1451 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24, 1452 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24, 1453 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24, 1454 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24, 1455 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2, 1456 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2, 1457 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2, 1458 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2, 1459 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3, 1460 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3, 1461 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3, 1462 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3, 1463 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4, 1464 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4, 1465 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4, 1466 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4, 1467 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6, 1468 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6, 1469 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6, 1470 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6, 1471 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7, 1472 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7, 1473 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7, 1474 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7, 1475 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8, 1476 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8, 1477 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8, 1478 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8, 1479 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9, 1480 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9, 1481 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9, 1482 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9, 1483 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10, 1484 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10, 1485 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10, 1486 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10, 1487 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11, 1488 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11, 1489 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11, 1490 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11, 1491 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4, 1492 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4, 1493 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4, 1494 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4, 1495 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5, 1496 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5, 1497 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5, 1498 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5, 1499 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1, 1500 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1, 1501 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1, 1502 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1, 1503 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12, 1504 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12, 1505 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12, 1506 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12, 1507 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13, 1508 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13, 1509 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13, 1510 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13, 1511 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14, 1512 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14, 1513 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14, 1514 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14, 1515 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15, 1516 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15, 1517 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15, 1518 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15, 1519 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16, 1520 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16, 1521 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16, 1522 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16, 1523 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17, 1524 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17, 1525 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17, 1526 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17, 1527 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6, 1528 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6, 1529 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6, 1530 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6, 1531 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7, 1532 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7, 1533 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7, 1534 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7, 1535 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5, 1536 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5, 1537 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5, 1538 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5, 1539 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18, 1540 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18, 1541 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18, 1542 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18, 1543 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19, 1544 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19, 1545 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19, 1546 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19, 1547 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20, 1548 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20, 1549 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20, 1550 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20, 1551 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21, 1552 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21, 1553 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21, 1554 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21, 1555 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22, 1556 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22, 1557 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22, 1558 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22, 1559 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23, 1560 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23, 1561 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23, 1562 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23, 1563 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0, 1564 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0, 1565 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0, 1566 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0, 1567 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1, 1568 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1, 1569 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1, 1570 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1, 1571 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2, 1572 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2, 1573 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2, 1574 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2, 1575 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3, 1576 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3, 1577 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3, 1578 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3, 1579 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4, 1580 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4, 1581 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4, 1582 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4, 1583 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5, 1584 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5, 1585 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5, 1586 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5, 1587 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6, 1588 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6, 1589 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6, 1590 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6, 1591 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7, 1592 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7, 1593 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7, 1594 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7, 1595 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8, 1596 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8, 1597 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8, 1598 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8, 1599 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9, 1600 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9, 1601 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9, 1602 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9, 1603 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10, 1604 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10, 1605 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10, 1606 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10, 1607 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11, 1608 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11, 1609 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11, 1610 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11, 1611 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12, 1612 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12, 1613 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12, 1614 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12, 1615 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13, 1616 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13, 1617 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13, 1618 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13, 1619 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14, 1620 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14, 1621 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14, 1622 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14, 1623 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15, 1624 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15, 1625 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15, 1626 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15, 1627 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16, 1628 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16, 1629 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16, 1630 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16, 1631 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17, 1632 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17, 1633 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17, 1634 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17, 1635 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18, 1636 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18, 1637 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18, 1638 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18, 1639 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19, 1640 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19, 1641 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19, 1642 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19, 1643 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20, 1644 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20, 1645 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20, 1646 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20, 1647 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21, 1648 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21, 1649 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21, 1650 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21, 1651 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22, 1652 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22, 1653 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22, 1654 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22, 1655 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23, 1656 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23, 1657 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23, 1658 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23, 1659 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0, 1660 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0, 1661 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0, 1662 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0, 1663 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1, 1664 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1, 1665 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1, 1666 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1 1667 }; 1668 1669 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = { 1670 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE, 1671 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE, 1672 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE, 1673 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE, 1674 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE, 1675 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE, 1676 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE, 1677 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE, 1678 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE, 1679 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE, 1680 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE 1681 }; 1682 1683 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = { 1684 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE, 1685 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE, 1686 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE, 1687 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE 1688 }; 1689 1690 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = { 1691 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE, 1692 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE, 1693 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE, 1694 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE, 1695 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE, 1696 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE, 1697 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE, 1698 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE, 1699 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE, 1700 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE, 1701 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE, 1702 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE, 1703 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE, 1704 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE, 1705 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE, 1706 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE, 1707 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE, 1708 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE, 1709 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE, 1710 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE, 1711 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE, 1712 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE, 1713 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE, 1714 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE, 1715 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE, 1716 }; 1717 1718 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = { 1719 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE, 1720 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE, 1721 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE, 1722 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE, 1723 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE, 1724 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE, 1725 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE, 1726 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE, 1727 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE, 1728 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE, 1729 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE, 1730 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE, 1731 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE, 1732 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE, 1733 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE, 1734 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE, 1735 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE, 1736 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE, 1737 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE, 1738 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE, 1739 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE, 1740 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE, 1741 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE, 1742 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE, 1743 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE, 1744 }; 1745 1746 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = { 1747 [ROTATOR_ID_0] = mmROT0_BASE, 1748 [ROTATOR_ID_1] = mmROT1_BASE 1749 }; 1750 1751 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = { 1752 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0, 1753 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0, 1754 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0, 1755 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0, 1756 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0, 1757 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0, 1758 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0, 1759 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0, 1760 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0, 1761 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0, 1762 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0, 1763 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0, 1764 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0, 1765 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0, 1766 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0, 1767 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0, 1768 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0, 1769 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0, 1770 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0, 1771 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0, 1772 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0, 1773 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0, 1774 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0, 1775 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0, 1776 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0, 1777 }; 1778 1779 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = { 1780 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0, 1781 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0, 1782 }; 1783 1784 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = { 1785 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0, 1786 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1, 1787 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2, 1788 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3, 1789 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4, 1790 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5, 1791 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0, 1792 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1, 1793 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2, 1794 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3, 1795 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4, 1796 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5, 1797 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0, 1798 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1, 1799 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2, 1800 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3, 1801 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4, 1802 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5, 1803 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0, 1804 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1, 1805 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2, 1806 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3, 1807 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4, 1808 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5, 1809 /* the PCI TPC is placed last (mapped liked HW) */ 1810 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6, 1811 }; 1812 1813 static const u32 gaudi2_mme_engine_id_to_mme_id[] = { 1814 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0, 1815 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1, 1816 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2, 1817 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3, 1818 }; 1819 1820 static const u32 gaudi2_edma_engine_id_to_edma_id[] = { 1821 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0, 1822 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1, 1823 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0, 1824 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1, 1825 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2, 1826 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3, 1827 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4, 1828 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5, 1829 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6, 1830 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7, 1831 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA, 1832 }; 1833 1834 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1835 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 1836 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0, 1837 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 1838 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0, 1839 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 1840 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0, 1841 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0, 1842 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0, 1843 }; 1844 1845 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = { 1846 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal", 1847 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal", 1848 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal", 1849 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal", 1850 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal", 1851 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal", 1852 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal", 1853 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal", 1854 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal", 1855 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal" 1856 }; 1857 1858 enum rtr_id { 1859 DCORE0_RTR0, 1860 DCORE0_RTR1, 1861 DCORE0_RTR2, 1862 DCORE0_RTR3, 1863 DCORE0_RTR4, 1864 DCORE0_RTR5, 1865 DCORE0_RTR6, 1866 DCORE0_RTR7, 1867 DCORE1_RTR0, 1868 DCORE1_RTR1, 1869 DCORE1_RTR2, 1870 DCORE1_RTR3, 1871 DCORE1_RTR4, 1872 DCORE1_RTR5, 1873 DCORE1_RTR6, 1874 DCORE1_RTR7, 1875 DCORE2_RTR0, 1876 DCORE2_RTR1, 1877 DCORE2_RTR2, 1878 DCORE2_RTR3, 1879 DCORE2_RTR4, 1880 DCORE2_RTR5, 1881 DCORE2_RTR6, 1882 DCORE2_RTR7, 1883 DCORE3_RTR0, 1884 DCORE3_RTR1, 1885 DCORE3_RTR2, 1886 DCORE3_RTR3, 1887 DCORE3_RTR4, 1888 DCORE3_RTR5, 1889 DCORE3_RTR6, 1890 DCORE3_RTR7, 1891 }; 1892 1893 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1894 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1895 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1896 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1897 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1898 DCORE0_RTR0 1899 }; 1900 1901 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1902 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, 1903 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, 1904 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0, 1905 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7, 1906 DCORE0_RTR0 1907 }; 1908 1909 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = { 1910 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1911 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1912 }; 1913 1914 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = { 1915 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1, 1916 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0 1917 }; 1918 1919 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = { 1920 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1921 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1922 }; 1923 1924 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = { 1925 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1926 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1927 }; 1928 1929 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = { 1930 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1931 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1932 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1933 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1934 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1935 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE, 1936 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE, 1937 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE 1938 }; 1939 1940 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = { 1941 DCORE0_RTR0, DCORE0_RTR0 1942 }; 1943 1944 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = { 1945 DCORE0_RTR2, DCORE0_RTR2 1946 }; 1947 1948 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = { 1949 DCORE2_RTR0, DCORE3_RTR7 1950 }; 1951 1952 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = { 1953 DCORE2_RTR2, DCORE3_RTR5 1954 }; 1955 1956 struct mme_initiators_rtr_id { 1957 u32 wap0; 1958 u32 wap1; 1959 u32 write; 1960 u32 read; 1961 u32 sbte0; 1962 u32 sbte1; 1963 u32 sbte2; 1964 u32 sbte3; 1965 u32 sbte4; 1966 }; 1967 1968 enum mme_initiators { 1969 MME_WAP0 = 0, 1970 MME_WAP1, 1971 MME_WRITE, 1972 MME_READ, 1973 MME_SBTE0, 1974 MME_SBTE1, 1975 MME_SBTE2, 1976 MME_SBTE3, 1977 MME_SBTE4, 1978 MME_INITIATORS_MAX 1979 }; 1980 1981 static const struct mme_initiators_rtr_id 1982 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = { 1983 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7, 1984 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6}, 1985 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8, 1986 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8}, 1987 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23, 1988 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23}, 1989 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30, 1990 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28}, 1991 }; 1992 1993 enum razwi_event_sources { 1994 RAZWI_TPC, 1995 RAZWI_MME, 1996 RAZWI_EDMA, 1997 RAZWI_PDMA, 1998 RAZWI_NIC, 1999 RAZWI_DEC, 2000 RAZWI_ROT 2001 }; 2002 2003 struct hbm_mc_error_causes { 2004 u32 mask; 2005 char cause[50]; 2006 }; 2007 2008 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS; 2009 2010 /* Special blocks iterator is currently used to configure security protection bits, 2011 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)- 2012 * must be skipped. Following configurations are commonly used for both PB config 2013 * and global error reading, since currently they both share the same settings. 2014 * Once it changes, we must remember to use separate configurations for either one. 2015 */ 2016 static int gaudi2_iterator_skip_block_types[] = { 2017 GAUDI2_BLOCK_TYPE_PLL, 2018 GAUDI2_BLOCK_TYPE_EU_BIST, 2019 GAUDI2_BLOCK_TYPE_HBM, 2020 GAUDI2_BLOCK_TYPE_XFT 2021 }; 2022 2023 static struct range gaudi2_iterator_skip_block_ranges[] = { 2024 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */ 2025 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE}, 2026 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE}, 2027 /* Skip all CPU blocks except for CPU_IF */ 2028 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE}, 2029 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE} 2030 }; 2031 2032 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = { 2033 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"}, 2034 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"}, 2035 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"}, 2036 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"}, 2037 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"}, 2038 }; 2039 2040 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = { 2041 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even", 2042 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd", 2043 [HBM_SEI_READ_ERR] = "SEI read data error", 2044 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error", 2045 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted", 2046 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail", 2047 [HBM_SEI_DFI] = "SEI DFI error", 2048 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read", 2049 [HBM_SEI_BIST_FAIL] = "SEI BIST fail" 2050 }; 2051 2052 struct mmu_spi_sei_cause { 2053 char cause[50]; 2054 int clear_bit; 2055 }; 2056 2057 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = { 2058 {"page fault", 1}, /* INTERRUPT_CLR[1] */ 2059 {"page access", 1}, /* INTERRUPT_CLR[1] */ 2060 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */ 2061 {"multi hit", 2}, /* INTERRUPT_CLR[2] */ 2062 {"mmu rei0", -1}, /* no clear register bit */ 2063 {"mmu rei1", -1}, /* no clear register bit */ 2064 {"stlb rei0", -1}, /* no clear register bit */ 2065 {"stlb rei1", -1}, /* no clear register bit */ 2066 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */ 2067 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */ 2068 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */ 2069 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */ 2070 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2071 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2072 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2073 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */ 2074 {"slave error", 16}, /* INTERRUPT_CLR[16] */ 2075 {"dec error", 17}, /* INTERRUPT_CLR[17] */ 2076 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */ 2077 }; 2078 2079 struct gaudi2_cache_invld_params { 2080 u64 start_va; 2081 u64 end_va; 2082 u32 inv_start_val; 2083 u32 flags; 2084 bool range_invalidation; 2085 }; 2086 2087 struct gaudi2_tpc_idle_data { 2088 struct engines_data *e; 2089 unsigned long *mask; 2090 bool *is_idle; 2091 const char *tpc_fmt; 2092 }; 2093 2094 struct gaudi2_tpc_mmu_data { 2095 u32 rw_asid; 2096 }; 2097 2098 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0}; 2099 2100 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val); 2101 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id); 2102 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id); 2103 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id); 2104 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id); 2105 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val); 2106 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size, 2107 bool is_memset); 2108 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 2109 struct engines_data *e); 2110 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 2111 struct engines_data *e); 2112 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 2113 struct engines_data *e); 2114 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr); 2115 2116 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev) 2117 { 2118 2119 } 2120 2121 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev) 2122 { 2123 return sizeof(struct packet_msg_short); 2124 } 2125 2126 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev) 2127 { 2128 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence); 2129 } 2130 2131 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) 2132 { 2133 struct asic_fixed_properties *prop = &hdev->asic_prop; 2134 int dcore, inst, tpc_seq; 2135 u32 offset; 2136 2137 /* init the return code */ 2138 ctx->rc = 0; 2139 2140 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 2141 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 2142 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 2143 2144 if (!(prop->tpc_enabled_mask & BIT(tpc_seq))) 2145 continue; 2146 2147 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 2148 2149 ctx->fn(hdev, dcore, inst, offset, ctx); 2150 if (ctx->rc) { 2151 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 2152 dcore, inst); 2153 return; 2154 } 2155 } 2156 } 2157 2158 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6))) 2159 return; 2160 2161 /* special check for PCI TPC (DCORE0_TPC6) */ 2162 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 2163 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 2164 if (ctx->rc) 2165 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 2166 } 2167 2168 static bool gaudi2_host_phys_addr_valid(u64 addr) 2169 { 2170 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1)) 2171 return true; 2172 2173 return false; 2174 } 2175 2176 static int set_number_of_functional_hbms(struct hl_device *hdev) 2177 { 2178 struct asic_fixed_properties *prop = &hdev->asic_prop; 2179 u8 faulty_hbms = hweight64(hdev->dram_binning); 2180 2181 /* check if all HBMs should be used */ 2182 if (!faulty_hbms) { 2183 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n"); 2184 prop->num_functional_hbms = GAUDI2_HBM_NUM; 2185 return 0; 2186 } 2187 2188 /* 2189 * check for error condition in which number of binning 2190 * candidates is higher than the maximum supported by the 2191 * driver (in which case binning mask shall be ignored and driver will 2192 * set the default) 2193 */ 2194 if (faulty_hbms > MAX_FAULTY_HBMS) { 2195 dev_err(hdev->dev, 2196 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n", 2197 MAX_FAULTY_HBMS, hdev->dram_binning); 2198 return -EINVAL; 2199 } 2200 2201 /* 2202 * by default, number of functional HBMs in Gaudi2 is always 2203 * GAUDI2_HBM_NUM - 1. 2204 */ 2205 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms; 2206 return 0; 2207 } 2208 2209 static int gaudi2_set_dram_properties(struct hl_device *hdev) 2210 { 2211 struct asic_fixed_properties *prop = &hdev->asic_prop; 2212 u32 basic_hbm_page_size; 2213 int rc; 2214 2215 rc = set_number_of_functional_hbms(hdev); 2216 if (rc) 2217 return -EINVAL; 2218 2219 /* 2220 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround 2221 * in which we are using x16 bigger page size to be able to populate the entire 2222 * HBM mappings in the TLB 2223 */ 2224 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M; 2225 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size; 2226 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 2227 prop->dram_size = prop->num_functional_hbms * SZ_16G; 2228 prop->dram_base_address = DRAM_PHYS_BASE; 2229 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 2230 prop->dram_supports_virtual_memory = true; 2231 2232 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size; 2233 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK; 2234 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START; 2235 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END; 2236 2237 /* since DRAM page size differs from DMMU page size we need to allocate 2238 * DRAM memory in units of dram_page size and mapping this memory in 2239 * units of DMMU page size. we overcome this size mismatch using a 2240 * scrambling routine which takes a DRAM page and converts it to a DMMU 2241 * page. 2242 * We therefore: 2243 * 1. partition the virtual address space to DRAM-page (whole) pages. 2244 * (suppose we get n such pages) 2245 * 2. limit the amount of virtual address space we got from 1 above to 2246 * a multiple of 64M as we don't want the scrambled address to cross 2247 * the DRAM virtual address space. 2248 * ( m = (n * DRAM_page_size) / DMMU_page_size). 2249 * 3. determine the and address accordingly 2250 * end_addr = start_addr + m * 48M 2251 * 2252 * the DRAM address MSBs (63:48) are not part of the roundup calculation 2253 */ 2254 prop->dmmu.start_addr = prop->dram_base_address + 2255 (prop->dram_page_size * 2256 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size)); 2257 2258 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size * 2259 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size); 2260 2261 return 0; 2262 } 2263 2264 static int gaudi2_set_fixed_properties(struct hl_device *hdev) 2265 { 2266 struct asic_fixed_properties *prop = &hdev->asic_prop; 2267 struct hw_queue_properties *q_props; 2268 u32 num_sync_stream_queues = 0; 2269 int i; 2270 2271 prop->max_queues = GAUDI2_QUEUE_ID_SIZE; 2272 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties), 2273 GFP_KERNEL); 2274 2275 if (!prop->hw_queues_props) 2276 return -ENOMEM; 2277 2278 q_props = prop->hw_queues_props; 2279 2280 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) { 2281 q_props[i].type = QUEUE_TYPE_HW; 2282 q_props[i].driver_only = 0; 2283 2284 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) { 2285 q_props[i].supports_sync_stream = 0; 2286 } else { 2287 q_props[i].supports_sync_stream = 1; 2288 num_sync_stream_queues++; 2289 } 2290 2291 q_props[i].cb_alloc_flags = CB_ALLOC_USER; 2292 } 2293 2294 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU; 2295 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1; 2296 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL; 2297 2298 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 2299 prop->cfg_base_address = CFG_BASE; 2300 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0; 2301 prop->host_base_address = HOST_PHYS_BASE_0; 2302 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0; 2303 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS; 2304 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER; 2305 prop->user_dec_intr_count = NUMBER_OF_DEC; 2306 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1; 2307 prop->completion_mode = HL_COMPLETION_MODE_CS; 2308 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER; 2309 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER; 2310 2311 prop->sram_base_address = SRAM_BASE_ADDR; 2312 prop->sram_size = SRAM_SIZE; 2313 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 2314 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET; 2315 2316 prop->hints_range_reservation = true; 2317 2318 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1; 2319 2320 if (hdev->pldm) 2321 prop->mmu_pgt_size = 0x800000; /* 8MB */ 2322 else 2323 prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE; 2324 2325 prop->mmu_pte_size = HL_PTE_SIZE; 2326 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 2327 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 2328 2329 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT; 2330 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT; 2331 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT; 2332 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT; 2333 prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT; 2334 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK; 2335 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK; 2336 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK; 2337 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK; 2338 prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK; 2339 prop->dmmu.page_size = PAGE_SIZE_1GB; 2340 prop->dmmu.num_hops = MMU_ARCH_6_HOPS; 2341 prop->dmmu.last_mask = LAST_MASK; 2342 prop->dmmu.host_resident = 1; 2343 prop->dmmu.hop_table_size = prop->mmu_hop_table_size; 2344 prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 2345 2346 /* 2347 * this is done in order to be able to validate FW descriptor (i.e. validating that 2348 * the addresses and allocated space for FW image does not cross memory bounds). 2349 * for this reason we set the DRAM size to the minimum possible and later it will 2350 * be modified according to what reported in the cpucp info packet 2351 */ 2352 prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G; 2353 2354 hdev->pmmu_huge_range = true; 2355 prop->pmmu.host_resident = 1; 2356 prop->pmmu.num_hops = MMU_ARCH_6_HOPS; 2357 prop->pmmu.last_mask = LAST_MASK; 2358 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 2359 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 2360 2361 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START; 2362 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END; 2363 prop->hints_host_hpage_reserved_va_range.start_addr = 2364 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START; 2365 prop->hints_host_hpage_reserved_va_range.end_addr = 2366 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END; 2367 2368 if (PAGE_SIZE == SZ_64K) { 2369 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K; 2370 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K; 2371 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K; 2372 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K; 2373 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K; 2374 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K; 2375 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K; 2376 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K; 2377 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K; 2378 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K; 2379 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K; 2380 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K; 2381 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2382 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2383 prop->pmmu.page_size = PAGE_SIZE_64KB; 2384 2385 /* shifts and masks are the same in PMMU and HPMMU */ 2386 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2387 prop->pmmu_huge.page_size = PAGE_SIZE_16MB; 2388 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2389 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2390 } else { 2391 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K; 2392 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K; 2393 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K; 2394 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K; 2395 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K; 2396 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K; 2397 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K; 2398 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K; 2399 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K; 2400 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K; 2401 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K; 2402 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K; 2403 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START; 2404 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END; 2405 prop->pmmu.page_size = PAGE_SIZE_4KB; 2406 2407 /* shifts and masks are the same in PMMU and HPMMU */ 2408 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 2409 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 2410 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START; 2411 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2412 } 2413 2414 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE; 2415 prop->num_engine_cores = CPU_ID_MAX; 2416 prop->cfg_size = CFG_SIZE; 2417 prop->max_asid = MAX_ASID; 2418 prop->num_of_events = GAUDI2_EVENT_SIZE; 2419 2420 prop->supports_engine_modes = true; 2421 2422 prop->dc_power_default = DC_POWER_DEFAULT; 2423 2424 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT; 2425 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE; 2426 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; 2427 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 2428 2429 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2430 2431 prop->mme_master_slave_mode = 1; 2432 2433 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER + 2434 (num_sync_stream_queues * HL_RSVD_SOBS); 2435 2436 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER + 2437 (num_sync_stream_queues * HL_RSVD_MONS); 2438 2439 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST; 2440 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT; 2441 prop->unexpected_user_error_interrupt_id = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR; 2442 2443 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER; 2444 2445 prop->fw_cpu_boot_dev_sts0_valid = false; 2446 prop->fw_cpu_boot_dev_sts1_valid = false; 2447 prop->hard_reset_done_by_fw = false; 2448 prop->gic_interrupts_enable = true; 2449 2450 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2451 2452 prop->max_dec = NUMBER_OF_DEC; 2453 2454 prop->clk_pll_index = HL_GAUDI2_MME_PLL; 2455 2456 prop->dma_mask = 64; 2457 2458 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; 2459 2460 return 0; 2461 } 2462 2463 static int gaudi2_pci_bars_map(struct hl_device *hdev) 2464 { 2465 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"}; 2466 bool is_wc[3] = {false, false, true}; 2467 int rc; 2468 2469 rc = hl_pci_bars_map(hdev, name, is_wc); 2470 if (rc) 2471 return rc; 2472 2473 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR); 2474 2475 return 0; 2476 } 2477 2478 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 2479 { 2480 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2481 struct hl_inbound_pci_region pci_region; 2482 u64 old_addr = addr; 2483 int rc; 2484 2485 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr)) 2486 return old_addr; 2487 2488 if (hdev->asic_prop.iatu_done_by_fw) 2489 return U64_MAX; 2490 2491 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2492 pci_region.mode = PCI_BAR_MATCH_MODE; 2493 pci_region.bar = DRAM_BAR_ID; 2494 pci_region.addr = addr; 2495 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 2496 if (rc) 2497 return U64_MAX; 2498 2499 if (gaudi2) { 2500 old_addr = gaudi2->dram_bar_cur_addr; 2501 gaudi2->dram_bar_cur_addr = addr; 2502 } 2503 2504 return old_addr; 2505 } 2506 2507 static int gaudi2_init_iatu(struct hl_device *hdev) 2508 { 2509 struct hl_inbound_pci_region inbound_region; 2510 struct hl_outbound_pci_region outbound_region; 2511 u32 bar_addr_low, bar_addr_high; 2512 int rc; 2513 2514 if (hdev->asic_prop.iatu_done_by_fw) 2515 return 0; 2516 2517 /* Temporary inbound Region 0 - Bar 0 - Point to CFG 2518 * We must map this region in BAR match mode in order to 2519 * fetch BAR physical base address 2520 */ 2521 inbound_region.mode = PCI_BAR_MATCH_MODE; 2522 inbound_region.bar = SRAM_CFG_BAR_ID; 2523 /* Base address must be aligned to Bar size which is 256 MB */ 2524 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF; 2525 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2526 if (rc) 2527 return rc; 2528 2529 /* Fetch physical BAR address */ 2530 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF); 2531 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF; 2532 2533 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low; 2534 2535 /* Inbound Region 0 - Bar 0 - Point to CFG */ 2536 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2537 inbound_region.bar = SRAM_CFG_BAR_ID; 2538 inbound_region.offset_in_bar = 0; 2539 inbound_region.addr = STM_FLASH_BASE_ADDR; 2540 inbound_region.size = CFG_REGION_SIZE; 2541 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 2542 if (rc) 2543 return rc; 2544 2545 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */ 2546 inbound_region.mode = PCI_ADDRESS_MATCH_MODE; 2547 inbound_region.bar = SRAM_CFG_BAR_ID; 2548 inbound_region.offset_in_bar = CFG_REGION_SIZE; 2549 inbound_region.addr = BAR0_RSRVD_BASE_ADDR; 2550 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE; 2551 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 2552 if (rc) 2553 return rc; 2554 2555 /* Inbound Region 2 - Bar 4 - Point to DRAM */ 2556 inbound_region.mode = PCI_BAR_MATCH_MODE; 2557 inbound_region.bar = DRAM_BAR_ID; 2558 inbound_region.addr = DRAM_PHYS_BASE; 2559 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 2560 if (rc) 2561 return rc; 2562 2563 /* Outbound Region 0 - Point to Host */ 2564 outbound_region.addr = HOST_PHYS_BASE_0; 2565 outbound_region.size = HOST_PHYS_SIZE_0; 2566 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 2567 2568 return rc; 2569 } 2570 2571 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev) 2572 { 2573 return RREG32(mmHW_STATE); 2574 } 2575 2576 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev) 2577 { 2578 struct asic_fixed_properties *prop = &hdev->asic_prop; 2579 2580 /* 2581 * check for error condition in which number of binning candidates 2582 * is higher than the maximum supported by the driver 2583 */ 2584 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) { 2585 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n", 2586 MAX_CLUSTER_BINNING_FAULTY_TPCS, 2587 hdev->tpc_binning); 2588 return -EINVAL; 2589 } 2590 2591 prop->tpc_binning_mask = hdev->tpc_binning; 2592 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK; 2593 2594 return 0; 2595 } 2596 2597 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev) 2598 { 2599 struct asic_fixed_properties *prop = &hdev->asic_prop; 2600 struct hw_queue_properties *q_props = prop->hw_queues_props; 2601 u64 tpc_binning_mask; 2602 u8 subst_idx = 0; 2603 int i, rc; 2604 2605 rc = gaudi2_tpc_binning_init_prop(hdev); 2606 if (rc) 2607 return rc; 2608 2609 tpc_binning_mask = prop->tpc_binning_mask; 2610 2611 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) { 2612 u8 subst_seq, binned, qid_base; 2613 2614 if (tpc_binning_mask == 0) 2615 break; 2616 2617 if (subst_idx == 0) { 2618 subst_seq = TPC_ID_DCORE0_TPC6; 2619 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 2620 } else { 2621 subst_seq = TPC_ID_DCORE3_TPC5; 2622 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0; 2623 } 2624 2625 2626 /* clear bit from mask */ 2627 binned = __ffs(tpc_binning_mask); 2628 /* 2629 * Coverity complains about possible out-of-bound access in 2630 * clear_bit 2631 */ 2632 if (binned >= TPC_ID_SIZE) { 2633 dev_err(hdev->dev, 2634 "Invalid binned TPC (binning mask: %llx)\n", 2635 tpc_binning_mask); 2636 return -EINVAL; 2637 } 2638 clear_bit(binned, (unsigned long *)&tpc_binning_mask); 2639 2640 /* also clear replacing TPC bit from enabled mask */ 2641 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask); 2642 2643 /* bin substite TPC's Qs */ 2644 q_props[qid_base].binned = 1; 2645 q_props[qid_base + 1].binned = 1; 2646 q_props[qid_base + 2].binned = 1; 2647 q_props[qid_base + 3].binned = 1; 2648 2649 subst_idx++; 2650 } 2651 2652 return 0; 2653 } 2654 2655 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev) 2656 { 2657 struct asic_fixed_properties *prop = &hdev->asic_prop; 2658 u8 num_faulty; 2659 2660 num_faulty = hweight32(hdev->decoder_binning); 2661 2662 /* 2663 * check for error condition in which number of binning candidates 2664 * is higher than the maximum supported by the driver 2665 */ 2666 if (num_faulty > MAX_FAULTY_DECODERS) { 2667 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n", 2668 hdev->decoder_binning); 2669 return -EINVAL; 2670 } 2671 2672 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK); 2673 2674 if (prop->decoder_binning_mask) 2675 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1)); 2676 else 2677 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK; 2678 2679 return 0; 2680 } 2681 2682 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev) 2683 { 2684 struct asic_fixed_properties *prop = &hdev->asic_prop; 2685 2686 /* check if we should override default binning */ 2687 if (!hdev->dram_binning) { 2688 prop->dram_binning_mask = 0; 2689 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK; 2690 return; 2691 } 2692 2693 /* set DRAM binning constraints */ 2694 prop->faulty_dram_cluster_map |= hdev->dram_binning; 2695 prop->dram_binning_mask = hdev->dram_binning; 2696 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5); 2697 } 2698 2699 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev) 2700 { 2701 struct asic_fixed_properties *prop = &hdev->asic_prop; 2702 struct hw_queue_properties *q_props; 2703 u8 seq, num_faulty; 2704 2705 num_faulty = hweight32(hdev->edma_binning); 2706 2707 /* 2708 * check for error condition in which number of binning candidates 2709 * is higher than the maximum supported by the driver 2710 */ 2711 if (num_faulty > MAX_FAULTY_EDMAS) { 2712 dev_err(hdev->dev, 2713 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n", 2714 hdev->edma_binning); 2715 return -EINVAL; 2716 } 2717 2718 if (!hdev->edma_binning) { 2719 prop->edma_binning_mask = 0; 2720 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK; 2721 return 0; 2722 } 2723 2724 seq = __ffs((unsigned long)hdev->edma_binning); 2725 2726 /* set binning constraints */ 2727 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]); 2728 prop->edma_binning_mask = hdev->edma_binning; 2729 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1); 2730 2731 /* bin substitute EDMA's queue */ 2732 q_props = prop->hw_queues_props; 2733 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1; 2734 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1; 2735 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1; 2736 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1; 2737 2738 return 0; 2739 } 2740 2741 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask) 2742 { 2743 struct asic_fixed_properties *prop = &hdev->asic_prop; 2744 u8 num_faulty, seq; 2745 2746 /* check if we should override default binning */ 2747 if (!xbar_edge_iso_mask) { 2748 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK; 2749 return 0; 2750 } 2751 2752 /* 2753 * note that it can be set to value other than 0 only after cpucp packet (i.e. 2754 * only the FW can set a redundancy value). for user it'll always be 0. 2755 */ 2756 num_faulty = hweight32(xbar_edge_iso_mask); 2757 2758 /* 2759 * check for error condition in which number of binning candidates 2760 * is higher than the maximum supported by the driver 2761 */ 2762 if (num_faulty > MAX_FAULTY_XBARS) { 2763 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n", 2764 MAX_FAULTY_XBARS); 2765 return -EINVAL; 2766 } 2767 2768 seq = __ffs((unsigned long)xbar_edge_iso_mask); 2769 2770 /* set binning constraints */ 2771 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]); 2772 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK; 2773 2774 return 0; 2775 } 2776 2777 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask) 2778 { 2779 int rc; 2780 2781 /* 2782 * mark all clusters as good, each component will "fail" cluster 2783 * based on eFuse/user values. 2784 * If more than single cluster is faulty- the chip is unusable 2785 */ 2786 hdev->asic_prop.faulty_dram_cluster_map = 0; 2787 2788 gaudi2_set_dram_binning_masks(hdev); 2789 2790 rc = gaudi2_set_edma_binning_masks(hdev); 2791 if (rc) 2792 return rc; 2793 2794 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask); 2795 if (rc) 2796 return rc; 2797 2798 2799 /* always initially set to full mask */ 2800 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK; 2801 2802 return 0; 2803 } 2804 2805 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev) 2806 { 2807 struct asic_fixed_properties *prop = &hdev->asic_prop; 2808 int rc; 2809 2810 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask); 2811 if (rc) 2812 return rc; 2813 2814 /* if we have DRAM binning reported by FW we should perform cluster config */ 2815 if (prop->faulty_dram_cluster_map) { 2816 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map); 2817 2818 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq]; 2819 } 2820 2821 return 0; 2822 } 2823 2824 static int gaudi2_set_binning_masks(struct hl_device *hdev) 2825 { 2826 int rc; 2827 2828 rc = gaudi2_set_cluster_binning_masks(hdev); 2829 if (rc) 2830 return rc; 2831 2832 rc = gaudi2_set_tpc_binning_masks(hdev); 2833 if (rc) 2834 return rc; 2835 2836 rc = gaudi2_set_dec_binning_masks(hdev); 2837 if (rc) 2838 return rc; 2839 2840 return 0; 2841 } 2842 2843 static int gaudi2_cpucp_info_get(struct hl_device *hdev) 2844 { 2845 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2846 struct asic_fixed_properties *prop = &hdev->asic_prop; 2847 long max_power; 2848 u64 dram_size; 2849 int rc; 2850 2851 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2852 return 0; 2853 2854 /* No point of asking this information again when not doing hard reset, as the device 2855 * CPU hasn't been reset 2856 */ 2857 if (hdev->reset_info.in_compute_reset) 2858 return 0; 2859 2860 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 2861 mmCPU_BOOT_ERR1); 2862 if (rc) 2863 return rc; 2864 2865 dram_size = le64_to_cpu(prop->cpucp_info.dram_size); 2866 if (dram_size) { 2867 /* we can have wither 5 or 6 HBMs. other values are invalid */ 2868 2869 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) && 2870 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) { 2871 dev_err(hdev->dev, 2872 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n", 2873 dram_size, prop->dram_size); 2874 dram_size = prop->dram_size; 2875 } 2876 2877 prop->dram_size = dram_size; 2878 prop->dram_end_address = prop->dram_base_address + dram_size; 2879 } 2880 2881 if (!strlen(prop->cpucp_info.card_name)) 2882 strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); 2883 2884 /* Overwrite binning masks with the actual binning values from F/W */ 2885 hdev->dram_binning = prop->cpucp_info.dram_binning_mask; 2886 hdev->edma_binning = prop->cpucp_info.edma_binning_mask; 2887 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask); 2888 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask)); 2889 2890 /* 2891 * at this point the DRAM parameters need to be updated according to data obtained 2892 * from the FW 2893 */ 2894 rc = hdev->asic_funcs->set_dram_properties(hdev); 2895 if (rc) 2896 return rc; 2897 2898 rc = hdev->asic_funcs->set_binning_masks(hdev); 2899 if (rc) 2900 return rc; 2901 2902 max_power = hl_fw_get_max_power(hdev); 2903 if (max_power < 0) 2904 return max_power; 2905 2906 prop->max_power_default = (u64) max_power; 2907 2908 return 0; 2909 } 2910 2911 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev) 2912 { 2913 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2914 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS]; 2915 int rc; 2916 2917 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 2918 return 0; 2919 2920 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr); 2921 if (rc) 2922 return rc; 2923 2924 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3]; 2925 2926 return 0; 2927 } 2928 2929 static int gaudi2_early_init(struct hl_device *hdev) 2930 { 2931 struct asic_fixed_properties *prop = &hdev->asic_prop; 2932 struct pci_dev *pdev = hdev->pdev; 2933 resource_size_t pci_bar_size; 2934 int rc; 2935 2936 rc = gaudi2_set_fixed_properties(hdev); 2937 if (rc) 2938 return rc; 2939 2940 /* Check BAR sizes */ 2941 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID); 2942 2943 if (pci_bar_size != CFG_BAR_SIZE) { 2944 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2945 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 2946 rc = -ENODEV; 2947 goto free_queue_props; 2948 } 2949 2950 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID); 2951 if (pci_bar_size != MSIX_BAR_SIZE) { 2952 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 2953 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE); 2954 rc = -ENODEV; 2955 goto free_queue_props; 2956 } 2957 2958 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2959 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2960 2961 /* 2962 * Only in pldm driver config iATU 2963 */ 2964 if (hdev->pldm) 2965 hdev->asic_prop.iatu_done_by_fw = false; 2966 else 2967 hdev->asic_prop.iatu_done_by_fw = true; 2968 2969 rc = hl_pci_init(hdev); 2970 if (rc) 2971 goto free_queue_props; 2972 2973 /* Before continuing in the initialization, we need to read the preboot 2974 * version to determine whether we run with a security-enabled firmware 2975 */ 2976 rc = hl_fw_read_preboot_status(hdev); 2977 if (rc) { 2978 if (hdev->reset_on_preboot_fail) 2979 /* we are already on failure flow, so don't check if hw_fini fails. */ 2980 hdev->asic_funcs->hw_fini(hdev, true, false); 2981 goto pci_fini; 2982 } 2983 2984 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 2985 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 2986 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2987 if (rc) { 2988 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 2989 goto pci_fini; 2990 } 2991 } 2992 2993 return 0; 2994 2995 pci_fini: 2996 hl_pci_fini(hdev); 2997 free_queue_props: 2998 kfree(hdev->asic_prop.hw_queues_props); 2999 return rc; 3000 } 3001 3002 static int gaudi2_early_fini(struct hl_device *hdev) 3003 { 3004 kfree(hdev->asic_prop.hw_queues_props); 3005 hl_pci_fini(hdev); 3006 3007 return 0; 3008 } 3009 3010 static bool gaudi2_is_arc_nic_owned(u64 arc_id) 3011 { 3012 switch (arc_id) { 3013 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 3014 return true; 3015 default: 3016 return false; 3017 } 3018 } 3019 3020 static bool gaudi2_is_arc_tpc_owned(u64 arc_id) 3021 { 3022 switch (arc_id) { 3023 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 3024 return true; 3025 default: 3026 return false; 3027 } 3028 } 3029 3030 static void gaudi2_init_arcs(struct hl_device *hdev) 3031 { 3032 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3033 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3034 u64 arc_id; 3035 u32 i; 3036 3037 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) { 3038 if (gaudi2_is_arc_enabled(hdev, i)) 3039 continue; 3040 3041 gaudi2_set_arc_id_cap(hdev, i); 3042 } 3043 3044 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 3045 if (!gaudi2_is_queue_enabled(hdev, i)) 3046 continue; 3047 3048 arc_id = gaudi2_queue_id_to_arc_id[i]; 3049 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3050 continue; 3051 3052 if (gaudi2_is_arc_nic_owned(arc_id) && 3053 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0))) 3054 continue; 3055 3056 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized & 3057 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0))) 3058 continue; 3059 3060 gaudi2_set_arc_id_cap(hdev, arc_id); 3061 } 3062 3063 /* Fetch ARC scratchpad address */ 3064 hdev->asic_prop.engine_core_interrupt_reg_addr = 3065 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl); 3066 } 3067 3068 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id) 3069 { 3070 u32 reg_base, reg_val; 3071 int rc; 3072 3073 switch (cpu_id) { 3074 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3: 3075 /* Each ARC scheduler has 2 consecutive DCCM blocks */ 3076 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3077 ARC_DCCM_BLOCK_SIZE * 2, true); 3078 if (rc) 3079 return rc; 3080 break; 3081 case CPU_ID_SCHED_ARC4: 3082 case CPU_ID_SCHED_ARC5: 3083 case CPU_ID_MME_QMAN_ARC0: 3084 case CPU_ID_MME_QMAN_ARC1: 3085 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3086 3087 /* Scrub lower DCCM block */ 3088 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3089 ARC_DCCM_BLOCK_SIZE, true); 3090 if (rc) 3091 return rc; 3092 3093 /* Switch to upper DCCM block */ 3094 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1); 3095 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 3096 3097 /* Scrub upper DCCM block */ 3098 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3099 ARC_DCCM_BLOCK_SIZE, true); 3100 if (rc) 3101 return rc; 3102 3103 /* Switch to lower DCCM block */ 3104 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0); 3105 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val); 3106 break; 3107 default: 3108 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id], 3109 ARC_DCCM_BLOCK_SIZE, true); 3110 if (rc) 3111 return rc; 3112 } 3113 3114 return 0; 3115 } 3116 3117 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev) 3118 { 3119 u16 arc_id; 3120 int rc; 3121 3122 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) { 3123 if (!gaudi2_is_arc_enabled(hdev, arc_id)) 3124 continue; 3125 3126 rc = gaudi2_scrub_arc_dccm(hdev, arc_id); 3127 if (rc) 3128 return rc; 3129 } 3130 3131 return 0; 3132 } 3133 3134 static int gaudi2_late_init(struct hl_device *hdev) 3135 { 3136 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3137 int rc; 3138 3139 hdev->asic_prop.supports_advanced_cpucp_rc = true; 3140 3141 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 3142 gaudi2->virt_msix_db_dma_addr); 3143 if (rc) { 3144 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 3145 return rc; 3146 } 3147 3148 rc = gaudi2_fetch_psoc_frequency(hdev); 3149 if (rc) { 3150 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 3151 goto disable_pci_access; 3152 } 3153 3154 gaudi2_init_arcs(hdev); 3155 3156 rc = gaudi2_scrub_arcs_dccm(hdev); 3157 if (rc) { 3158 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n"); 3159 goto disable_pci_access; 3160 } 3161 3162 gaudi2_init_security(hdev); 3163 3164 return 0; 3165 3166 disable_pci_access: 3167 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 3168 3169 return rc; 3170 } 3171 3172 static void gaudi2_late_fini(struct hl_device *hdev) 3173 { 3174 hl_hwmon_release_resources(hdev); 3175 } 3176 3177 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) 3178 { 3179 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 3180 3181 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3182 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3183 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3184 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3185 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3186 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3187 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3188 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3189 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE); 3190 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE); 3191 } 3192 3193 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev) 3194 { 3195 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3196 struct user_mapped_block *blocks = gaudi2->mapped_blocks; 3197 u32 block_size, umr_start_idx, num_umr_blocks; 3198 int i; 3199 3200 for (i = 0 ; i < NUM_ARC_CPUS ; i++) { 3201 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3) 3202 block_size = ARC_DCCM_BLOCK_SIZE * 2; 3203 else 3204 block_size = ARC_DCCM_BLOCK_SIZE; 3205 3206 blocks[i].address = gaudi2_arc_dccm_bases[i]; 3207 blocks[i].size = block_size; 3208 } 3209 3210 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE; 3211 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE; 3212 3213 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE; 3214 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE; 3215 3216 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE; 3217 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE; 3218 3219 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE; 3220 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE; 3221 3222 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE; 3223 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE; 3224 3225 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE; 3226 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE; 3227 3228 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE; 3229 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE; 3230 3231 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE; 3232 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE; 3233 3234 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS; 3235 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS; 3236 for (i = 0 ; i < num_umr_blocks ; i++) { 3237 u8 nic_id, umr_block_id; 3238 3239 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS; 3240 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS; 3241 3242 blocks[umr_start_idx + i].address = 3243 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE + 3244 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET + 3245 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET + 3246 umr_block_id * NIC_UMR_OFFSET; 3247 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE; 3248 } 3249 3250 /* Expose decoder HW configuration block to user */ 3251 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX); 3252 3253 for (i = 1; i < NUM_OF_DCORES; ++i) { 3254 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE; 3255 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE; 3256 3257 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address = 3258 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET; 3259 3260 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address = 3261 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET; 3262 } 3263 } 3264 3265 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 3266 { 3267 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 3268 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}; 3269 int i, j, rc = 0; 3270 3271 /* The device ARC works with 32-bits addresses, and because there is a single HW register 3272 * that holds the extension bits (49..28), these bits must be identical in all the allocated 3273 * range. 3274 */ 3275 3276 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 3277 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 3278 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO); 3279 if (!virt_addr_arr[i]) { 3280 rc = -ENOMEM; 3281 goto free_dma_mem_arr; 3282 } 3283 3284 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 3285 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr)) 3286 break; 3287 } 3288 3289 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) { 3290 dev_err(hdev->dev, 3291 "MSB of ARC accessible DMA memory are not identical in all range\n"); 3292 rc = -EFAULT; 3293 goto free_dma_mem_arr; 3294 } 3295 3296 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 3297 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 3298 3299 free_dma_mem_arr: 3300 for (j = 0 ; j < i ; j++) 3301 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 3302 dma_addr_arr[j]); 3303 3304 return rc; 3305 } 3306 3307 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev) 3308 { 3309 struct asic_fixed_properties *prop = &hdev->asic_prop; 3310 struct pci_mem_region *region; 3311 3312 /* CFG */ 3313 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 3314 region->region_base = CFG_BASE; 3315 region->region_size = CFG_SIZE; 3316 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR; 3317 region->bar_size = CFG_BAR_SIZE; 3318 region->bar_id = SRAM_CFG_BAR_ID; 3319 region->used = 1; 3320 3321 /* SRAM */ 3322 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 3323 region->region_base = SRAM_BASE_ADDR; 3324 region->region_size = SRAM_SIZE; 3325 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE; 3326 region->bar_size = CFG_BAR_SIZE; 3327 region->bar_id = SRAM_CFG_BAR_ID; 3328 region->used = 1; 3329 3330 /* DRAM */ 3331 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 3332 region->region_base = DRAM_PHYS_BASE; 3333 region->region_size = hdev->asic_prop.dram_size; 3334 region->offset_in_bar = 0; 3335 region->bar_size = prop->dram_pci_bar_size; 3336 region->bar_id = DRAM_BAR_ID; 3337 region->used = 1; 3338 } 3339 3340 static void gaudi2_user_interrupt_setup(struct hl_device *hdev) 3341 { 3342 struct asic_fixed_properties *prop = &hdev->asic_prop; 3343 int i, j, k; 3344 3345 /* Initialize TPC interrupt */ 3346 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC); 3347 3348 /* Initialize general purpose interrupt */ 3349 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0, 3350 HL_USR_INTERRUPT_UNEXPECTED); 3351 3352 /* Initialize common user CQ interrupt */ 3353 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev, 3354 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ); 3355 3356 /* Initialize common decoder interrupt */ 3357 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev, 3358 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER); 3359 3360 /* User interrupts structure holds both decoder and user interrupts from various engines. 3361 * We first initialize the decoder interrupts and then we add the user interrupts. 3362 * The only limitation is that the last decoder interrupt id must be smaller 3363 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time. 3364 */ 3365 3366 /* Initialize decoder interrupts, expose only normal interrupts, 3367 * error interrupts to be handled by driver 3368 */ 3369 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM; 3370 i += 2, j++) 3371 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, 3372 HL_USR_INTERRUPT_DECODER); 3373 3374 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++) 3375 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ); 3376 } 3377 3378 static inline int gaudi2_get_non_zero_random_int(void) 3379 { 3380 int rand = get_random_u32(); 3381 3382 return rand ? rand : 1; 3383 } 3384 3385 static void gaudi2_special_blocks_free(struct hl_device *hdev) 3386 { 3387 struct asic_fixed_properties *prop = &hdev->asic_prop; 3388 struct hl_skip_blocks_cfg *skip_special_blocks_cfg = 3389 &prop->skip_special_blocks_cfg; 3390 3391 kfree(prop->special_blocks); 3392 kfree(skip_special_blocks_cfg->block_types); 3393 kfree(skip_special_blocks_cfg->block_ranges); 3394 } 3395 3396 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev) 3397 { 3398 gaudi2_special_blocks_free(hdev); 3399 } 3400 3401 static bool gaudi2_special_block_skip(struct hl_device *hdev, 3402 struct hl_special_blocks_cfg *special_blocks_cfg, 3403 u32 blk_idx, u32 major, u32 minor, u32 sub_minor) 3404 { 3405 return false; 3406 } 3407 3408 static int gaudi2_special_blocks_config(struct hl_device *hdev) 3409 { 3410 struct asic_fixed_properties *prop = &hdev->asic_prop; 3411 int i, rc; 3412 3413 /* Configure Special blocks */ 3414 prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; 3415 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); 3416 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, 3417 sizeof(*prop->special_blocks), GFP_KERNEL); 3418 if (!prop->special_blocks) 3419 return -ENOMEM; 3420 3421 for (i = 0 ; i < prop->num_of_special_blocks ; i++) 3422 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i], 3423 sizeof(*prop->special_blocks)); 3424 3425 /* Configure when to skip Special blocks */ 3426 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg)); 3427 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip; 3428 3429 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) { 3430 prop->skip_special_blocks_cfg.block_types = 3431 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types), 3432 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL); 3433 if (!prop->skip_special_blocks_cfg.block_types) { 3434 rc = -ENOMEM; 3435 goto free_special_blocks; 3436 } 3437 3438 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types, 3439 sizeof(gaudi2_iterator_skip_block_types)); 3440 3441 prop->skip_special_blocks_cfg.block_types_len = 3442 ARRAY_SIZE(gaudi2_iterator_skip_block_types); 3443 } 3444 3445 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) { 3446 prop->skip_special_blocks_cfg.block_ranges = 3447 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges), 3448 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL); 3449 if (!prop->skip_special_blocks_cfg.block_ranges) { 3450 rc = -ENOMEM; 3451 goto free_skip_special_blocks_types; 3452 } 3453 3454 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++) 3455 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i], 3456 &gaudi2_iterator_skip_block_ranges[i], 3457 sizeof(struct range)); 3458 3459 prop->skip_special_blocks_cfg.block_ranges_len = 3460 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges); 3461 } 3462 3463 return 0; 3464 3465 free_skip_special_blocks_types: 3466 kfree(prop->skip_special_blocks_cfg.block_types); 3467 free_special_blocks: 3468 kfree(prop->special_blocks); 3469 3470 return rc; 3471 } 3472 3473 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev) 3474 { 3475 return gaudi2_special_blocks_config(hdev); 3476 } 3477 3478 static int gaudi2_sw_init(struct hl_device *hdev) 3479 { 3480 struct asic_fixed_properties *prop = &hdev->asic_prop; 3481 struct gaudi2_device *gaudi2; 3482 int i, rc; 3483 3484 /* Allocate device structure */ 3485 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL); 3486 if (!gaudi2) 3487 return -ENOMEM; 3488 3489 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) { 3490 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid) 3491 continue; 3492 3493 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) { 3494 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n", 3495 GAUDI2_EVENT_SIZE); 3496 rc = -EINVAL; 3497 goto free_gaudi2_device; 3498 } 3499 3500 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id; 3501 } 3502 3503 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) 3504 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int(); 3505 3506 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get; 3507 3508 hdev->asic_specific = gaudi2; 3509 3510 /* Create DMA pool for small allocations. 3511 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped 3512 * PI/CI registers allocated from this pool have this restriction 3513 */ 3514 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, 3515 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0); 3516 if (!hdev->dma_pool) { 3517 dev_err(hdev->dev, "failed to create DMA pool\n"); 3518 rc = -ENOMEM; 3519 goto free_gaudi2_device; 3520 } 3521 3522 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev); 3523 if (rc) 3524 goto free_dma_pool; 3525 3526 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 3527 if (!hdev->cpu_accessible_dma_pool) { 3528 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); 3529 rc = -ENOMEM; 3530 goto free_cpu_dma_mem; 3531 } 3532 3533 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem, 3534 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 3535 if (rc) { 3536 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n"); 3537 rc = -EFAULT; 3538 goto free_cpu_accessible_dma_pool; 3539 } 3540 3541 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size, 3542 &gaudi2->virt_msix_db_dma_addr); 3543 if (!gaudi2->virt_msix_db_cpu_addr) { 3544 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n"); 3545 rc = -ENOMEM; 3546 goto free_cpu_accessible_dma_pool; 3547 } 3548 3549 spin_lock_init(&gaudi2->hw_queues_lock); 3550 3551 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3552 &gaudi2->scratchpad_bus_address, 3553 GFP_KERNEL | __GFP_ZERO); 3554 if (!gaudi2->scratchpad_kernel_address) { 3555 rc = -ENOMEM; 3556 goto free_virt_msix_db_mem; 3557 } 3558 3559 gaudi2_user_mapped_blocks_init(hdev); 3560 3561 /* Initialize user interrupts */ 3562 gaudi2_user_interrupt_setup(hdev); 3563 3564 hdev->supports_coresight = true; 3565 hdev->supports_sync_stream = true; 3566 hdev->supports_cb_mapping = true; 3567 hdev->supports_wait_for_multi_cs = false; 3568 3569 prop->supports_compute_reset = true; 3570 3571 hdev->asic_funcs->set_pci_memory_regions(hdev); 3572 3573 rc = gaudi2_special_blocks_iterator_config(hdev); 3574 if (rc) 3575 goto free_scratchpad_mem; 3576 3577 return 0; 3578 3579 free_scratchpad_mem: 3580 hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address, 3581 gaudi2->scratchpad_bus_address); 3582 free_virt_msix_db_mem: 3583 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3584 free_cpu_accessible_dma_pool: 3585 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3586 free_cpu_dma_mem: 3587 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3588 hdev->cpu_accessible_dma_address); 3589 free_dma_pool: 3590 dma_pool_destroy(hdev->dma_pool); 3591 free_gaudi2_device: 3592 kfree(gaudi2); 3593 return rc; 3594 } 3595 3596 static int gaudi2_sw_fini(struct hl_device *hdev) 3597 { 3598 struct asic_fixed_properties *prop = &hdev->asic_prop; 3599 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3600 3601 gaudi2_special_blocks_iterator_free(hdev); 3602 3603 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); 3604 3605 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 3606 3607 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 3608 hdev->cpu_accessible_dma_address); 3609 3610 hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, 3611 gaudi2->scratchpad_bus_address); 3612 3613 dma_pool_destroy(hdev->dma_pool); 3614 3615 kfree(gaudi2); 3616 3617 return 0; 3618 } 3619 3620 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base) 3621 { 3622 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP | 3623 QM_GLBL_CFG1_CQF_STOP | 3624 QM_GLBL_CFG1_CP_STOP); 3625 3626 /* stop also the ARC */ 3627 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP); 3628 } 3629 3630 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base) 3631 { 3632 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH | 3633 QM_GLBL_CFG1_CQF_FLUSH | 3634 QM_GLBL_CFG1_CP_FLUSH); 3635 } 3636 3637 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base) 3638 { 3639 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH); 3640 } 3641 3642 /** 3643 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters 3644 * 3645 * @hdev: pointer to the habanalabs device structure 3646 * @queue_id: queue to clear fence counters to 3647 * @skip_fence: if true set maximum fence value to all fence counters to avoid 3648 * getting stuck on any fence value. otherwise set all fence 3649 * counters to 0 (standard clear of fence counters) 3650 */ 3651 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id, 3652 bool skip_fence) 3653 { 3654 u32 size, reg_base; 3655 u32 addr, val; 3656 3657 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3658 3659 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET; 3660 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0; 3661 3662 /* 3663 * in case we want to make sure that QM that is stuck on a fence will 3664 * be released we should set the fence counter to a higher value that 3665 * the value the QM waiting for. to comply with any fence counter of 3666 * any value we set maximum fence value to all counters 3667 */ 3668 val = skip_fence ? U32_MAX : 0; 3669 gaudi2_memset_device_lbw(hdev, addr, size, val); 3670 } 3671 3672 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id) 3673 { 3674 u32 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3675 3676 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true); 3677 gaudi2_flush_qman_common(hdev, reg_base); 3678 gaudi2_flush_qman_arc_common(hdev, reg_base); 3679 } 3680 3681 static void gaudi2_stop_dma_qmans(struct hl_device *hdev) 3682 { 3683 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3684 int dcore, inst; 3685 3686 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3687 goto stop_edma_qmans; 3688 3689 /* Stop CPs of PDMA QMANs */ 3690 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE); 3691 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE); 3692 3693 stop_edma_qmans: 3694 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3695 return; 3696 3697 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3698 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3699 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3700 u32 qm_base; 3701 3702 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3703 continue; 3704 3705 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3706 inst * DCORE_EDMA_OFFSET; 3707 3708 /* Stop CPs of EDMA QMANs */ 3709 gaudi2_stop_qman_common(hdev, qm_base); 3710 } 3711 } 3712 } 3713 3714 static void gaudi2_stop_mme_qmans(struct hl_device *hdev) 3715 { 3716 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3717 u32 offset, i; 3718 3719 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3720 3721 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 3722 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))) 3723 continue; 3724 3725 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3726 } 3727 } 3728 3729 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev) 3730 { 3731 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3732 u32 reg_base; 3733 int i; 3734 3735 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3736 return; 3737 3738 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3739 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3740 continue; 3741 3742 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3743 gaudi2_stop_qman_common(hdev, reg_base); 3744 } 3745 } 3746 3747 static void gaudi2_stop_rot_qmans(struct hl_device *hdev) 3748 { 3749 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3750 u32 reg_base; 3751 int i; 3752 3753 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3754 return; 3755 3756 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3757 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3758 continue; 3759 3760 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3761 gaudi2_stop_qman_common(hdev, reg_base); 3762 } 3763 } 3764 3765 static void gaudi2_stop_nic_qmans(struct hl_device *hdev) 3766 { 3767 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3768 u32 reg_base, queue_id; 3769 int i; 3770 3771 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3772 return; 3773 3774 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3775 3776 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3777 if (!(hdev->nic_ports_mask & BIT(i))) 3778 continue; 3779 3780 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3781 gaudi2_stop_qman_common(hdev, reg_base); 3782 } 3783 } 3784 3785 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base) 3786 { 3787 u32 reg_val; 3788 3789 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1); 3790 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val); 3791 } 3792 3793 static void gaudi2_dma_stall(struct hl_device *hdev) 3794 { 3795 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3796 int dcore, inst; 3797 3798 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3799 goto stall_edma; 3800 3801 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE); 3802 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE); 3803 3804 stall_edma: 3805 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3806 return; 3807 3808 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3809 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3810 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3811 u32 core_base; 3812 3813 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3814 continue; 3815 3816 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET + 3817 inst * DCORE_EDMA_OFFSET; 3818 3819 /* Stall CPs of EDMA QMANs */ 3820 gaudi2_stall_dma_common(hdev, core_base); 3821 } 3822 } 3823 } 3824 3825 static void gaudi2_mme_stall(struct hl_device *hdev) 3826 { 3827 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3828 u32 offset, i; 3829 3830 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL; 3831 3832 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3833 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3834 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1); 3835 } 3836 3837 static void gaudi2_tpc_stall(struct hl_device *hdev) 3838 { 3839 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3840 u32 reg_base; 3841 int i; 3842 3843 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3844 return; 3845 3846 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3847 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3848 continue; 3849 3850 reg_base = gaudi2_tpc_cfg_blocks_bases[i]; 3851 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1); 3852 } 3853 } 3854 3855 static void gaudi2_rotator_stall(struct hl_device *hdev) 3856 { 3857 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3858 u32 reg_val; 3859 int i; 3860 3861 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3862 return; 3863 3864 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) | 3865 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) | 3866 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1); 3867 3868 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3869 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3870 continue; 3871 3872 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val); 3873 } 3874 } 3875 3876 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base) 3877 { 3878 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0); 3879 } 3880 3881 static void gaudi2_disable_dma_qmans(struct hl_device *hdev) 3882 { 3883 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3884 int dcore, inst; 3885 3886 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK)) 3887 goto stop_edma_qmans; 3888 3889 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE); 3890 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE); 3891 3892 stop_edma_qmans: 3893 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 3894 return; 3895 3896 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 3897 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 3898 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 3899 u32 qm_base; 3900 3901 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq))) 3902 continue; 3903 3904 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET + 3905 inst * DCORE_EDMA_OFFSET; 3906 3907 /* Disable CPs of EDMA QMANs */ 3908 gaudi2_disable_qman_common(hdev, qm_base); 3909 } 3910 } 3911 } 3912 3913 static void gaudi2_disable_mme_qmans(struct hl_device *hdev) 3914 { 3915 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3916 u32 offset, i; 3917 3918 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE; 3919 3920 for (i = 0 ; i < NUM_OF_DCORES ; i++) 3921 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)) 3922 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset)); 3923 } 3924 3925 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev) 3926 { 3927 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3928 u32 reg_base; 3929 int i; 3930 3931 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 3932 return; 3933 3934 for (i = 0 ; i < TPC_ID_SIZE ; i++) { 3935 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i))) 3936 continue; 3937 3938 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]]; 3939 gaudi2_disable_qman_common(hdev, reg_base); 3940 } 3941 } 3942 3943 static void gaudi2_disable_rot_qmans(struct hl_device *hdev) 3944 { 3945 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3946 u32 reg_base; 3947 int i; 3948 3949 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK)) 3950 return; 3951 3952 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) { 3953 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i))) 3954 continue; 3955 3956 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]]; 3957 gaudi2_disable_qman_common(hdev, reg_base); 3958 } 3959 } 3960 3961 static void gaudi2_disable_nic_qmans(struct hl_device *hdev) 3962 { 3963 struct gaudi2_device *gaudi2 = hdev->asic_specific; 3964 u32 reg_base, queue_id; 3965 int i; 3966 3967 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 3968 return; 3969 3970 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3971 3972 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3973 if (!(hdev->nic_ports_mask & BIT(i))) 3974 continue; 3975 3976 reg_base = gaudi2_qm_blocks_bases[queue_id]; 3977 gaudi2_disable_qman_common(hdev, reg_base); 3978 } 3979 } 3980 3981 static void gaudi2_enable_timestamp(struct hl_device *hdev) 3982 { 3983 /* Disable the timestamp counter */ 3984 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3985 3986 /* Zero the lower/upper parts of the 64-bit counter */ 3987 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0); 3988 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0); 3989 3990 /* Enable the counter */ 3991 WREG32(mmPSOC_TIMESTAMP_BASE, 1); 3992 } 3993 3994 static void gaudi2_disable_timestamp(struct hl_device *hdev) 3995 { 3996 /* Disable the timestamp counter */ 3997 WREG32(mmPSOC_TIMESTAMP_BASE, 0); 3998 } 3999 4000 static const char *gaudi2_irq_name(u16 irq_number) 4001 { 4002 switch (irq_number) { 4003 case GAUDI2_IRQ_NUM_EVENT_QUEUE: 4004 return "gaudi2 cpu eq"; 4005 case GAUDI2_IRQ_NUM_COMPLETION: 4006 return "gaudi2 completion"; 4007 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM: 4008 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM]; 4009 case GAUDI2_IRQ_NUM_TPC_ASSERT: 4010 return "gaudi2 tpc assert"; 4011 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR: 4012 return "gaudi2 tpc assert"; 4013 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: 4014 return "gaudi2 user completion"; 4015 default: 4016 return "invalid"; 4017 } 4018 } 4019 4020 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num) 4021 { 4022 int i, irq, relative_idx; 4023 struct hl_dec *dec; 4024 4025 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) { 4026 irq = pci_irq_vector(hdev->pdev, i); 4027 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 4028 4029 dec = hdev->dec + relative_idx / 2; 4030 4031 /* We pass different structures depending on the irq handler. For the abnormal 4032 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 4033 * user_interrupt entry 4034 */ 4035 free_irq(irq, ((relative_idx % 2) ? 4036 (void *) dec : 4037 (void *) &hdev->user_interrupt[dec->core_id])); 4038 } 4039 } 4040 4041 static int gaudi2_dec_enable_msix(struct hl_device *hdev) 4042 { 4043 int rc, i, irq_init_cnt, irq, relative_idx; 4044 struct hl_dec *dec; 4045 4046 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0; 4047 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM; 4048 i++, irq_init_cnt++) { 4049 4050 irq = pci_irq_vector(hdev->pdev, i); 4051 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM; 4052 4053 /* We pass different structures depending on the irq handler. For the abnormal 4054 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant 4055 * user_interrupt entry 4056 * 4057 * TODO: change the dec abnrm to threaded irq 4058 */ 4059 4060 dec = hdev->dec + relative_idx / 2; 4061 if (relative_idx % 2) { 4062 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0, 4063 gaudi2_irq_name(i), (void *) dec); 4064 } else { 4065 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, 4066 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, 4067 gaudi2_irq_name(i), 4068 (void *) &hdev->user_interrupt[dec->core_id]); 4069 } 4070 4071 if (rc) { 4072 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4073 goto free_dec_irqs; 4074 } 4075 } 4076 4077 return 0; 4078 4079 free_dec_irqs: 4080 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt)); 4081 return rc; 4082 } 4083 4084 static int gaudi2_enable_msix(struct hl_device *hdev) 4085 { 4086 struct asic_fixed_properties *prop = &hdev->asic_prop; 4087 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4088 int rc, irq, i, j, user_irq_init_cnt; 4089 struct hl_cq *cq; 4090 4091 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX) 4092 return 0; 4093 4094 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES, 4095 PCI_IRQ_MSIX); 4096 if (rc < 0) { 4097 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n", 4098 GAUDI2_MSIX_ENTRIES, rc); 4099 return rc; 4100 } 4101 4102 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 4103 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 4104 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq); 4105 if (rc) { 4106 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4107 goto free_irq_vectors; 4108 } 4109 4110 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 4111 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE), 4112 &hdev->event_queue); 4113 if (rc) { 4114 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4115 goto free_completion_irq; 4116 } 4117 4118 rc = gaudi2_dec_enable_msix(hdev); 4119 if (rc) { 4120 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 4121 goto free_event_irq; 4122 } 4123 4124 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); 4125 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, 4126 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, 4127 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt); 4128 if (rc) { 4129 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4130 goto free_dec_irq; 4131 } 4132 4133 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); 4134 rc = request_irq(irq, hl_irq_handler_user_interrupt, 0, 4135 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR), 4136 &hdev->unexpected_error_interrupt); 4137 if (rc) { 4138 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4139 goto free_tpc_irq; 4140 } 4141 4142 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; 4143 user_irq_init_cnt < prop->user_interrupt_count; 4144 i++, j++, user_irq_init_cnt++) { 4145 4146 irq = pci_irq_vector(hdev->pdev, i); 4147 rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, 4148 hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, 4149 gaudi2_irq_name(i), &hdev->user_interrupt[j]); 4150 4151 if (rc) { 4152 dev_err(hdev->dev, "Failed to request IRQ %d", irq); 4153 goto free_user_irq; 4154 } 4155 } 4156 4157 gaudi2->hw_cap_initialized |= HW_CAP_MSIX; 4158 4159 return 0; 4160 4161 free_user_irq: 4162 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count; 4163 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) { 4164 4165 irq = pci_irq_vector(hdev->pdev, i); 4166 free_irq(irq, &hdev->user_interrupt[j]); 4167 } 4168 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); 4169 free_irq(irq, &hdev->unexpected_error_interrupt); 4170 free_tpc_irq: 4171 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); 4172 free_irq(irq, &hdev->tpc_interrupt); 4173 free_dec_irq: 4174 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1); 4175 free_event_irq: 4176 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 4177 free_irq(irq, cq); 4178 4179 free_completion_irq: 4180 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 4181 free_irq(irq, cq); 4182 4183 free_irq_vectors: 4184 pci_free_irq_vectors(hdev->pdev); 4185 4186 return rc; 4187 } 4188 4189 static void gaudi2_sync_irqs(struct hl_device *hdev) 4190 { 4191 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4192 int i, j; 4193 int irq; 4194 4195 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 4196 return; 4197 4198 /* Wait for all pending IRQs to be finished */ 4199 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION)); 4200 4201 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) { 4202 irq = pci_irq_vector(hdev->pdev, i); 4203 synchronize_irq(irq); 4204 } 4205 4206 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT)); 4207 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR)); 4208 4209 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count; 4210 i++, j++) { 4211 irq = pci_irq_vector(hdev->pdev, i); 4212 synchronize_irq(irq); 4213 } 4214 4215 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); 4216 } 4217 4218 static void gaudi2_disable_msix(struct hl_device *hdev) 4219 { 4220 struct asic_fixed_properties *prop = &hdev->asic_prop; 4221 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4222 struct hl_cq *cq; 4223 int irq, i, j, k; 4224 4225 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX)) 4226 return; 4227 4228 gaudi2_sync_irqs(hdev); 4229 4230 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 4231 free_irq(irq, &hdev->event_queue); 4232 4233 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 4234 4235 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); 4236 free_irq(irq, &hdev->tpc_interrupt); 4237 4238 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); 4239 free_irq(irq, &hdev->unexpected_error_interrupt); 4240 4241 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0; 4242 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) { 4243 4244 irq = pci_irq_vector(hdev->pdev, i); 4245 free_irq(irq, &hdev->user_interrupt[j]); 4246 } 4247 4248 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); 4249 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; 4250 free_irq(irq, cq); 4251 4252 pci_free_irq_vectors(hdev->pdev); 4253 4254 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; 4255 } 4256 4257 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id) 4258 { 4259 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 4260 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 4261 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 4262 int rc; 4263 4264 if (hdev->pldm) 4265 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 4266 else 4267 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 4268 4269 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4270 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 4271 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4272 continue; 4273 4274 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET; 4275 4276 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0); 4277 4278 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 4279 4280 /* Wait till all traffic from decoder stops 4281 * before apply core reset. 4282 */ 4283 rc = hl_poll_timeout( 4284 hdev, 4285 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, 4286 graceful, 4287 (graceful & graceful_pend_mask), 4288 100, 4289 timeout_usec); 4290 if (rc) 4291 dev_err(hdev->dev, 4292 "Failed to stop traffic from DCORE%d Decoder %d\n", 4293 dcore_id, dec_id); 4294 } 4295 } 4296 4297 static void gaudi2_stop_pcie_dec(struct hl_device *hdev) 4298 { 4299 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1); 4300 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK; 4301 u32 timeout_usec, dec_id, dec_bit, offset, graceful; 4302 int rc; 4303 4304 if (hdev->pldm) 4305 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC; 4306 else 4307 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC; 4308 4309 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 4310 dec_bit = PCIE_DEC_SHIFT + dec_id; 4311 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 4312 continue; 4313 4314 offset = dec_id * PCIE_VDEC_OFFSET; 4315 4316 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0); 4317 4318 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val); 4319 4320 /* Wait till all traffic from decoder stops 4321 * before apply core reset. 4322 */ 4323 rc = hl_poll_timeout( 4324 hdev, 4325 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, 4326 graceful, 4327 (graceful & graceful_pend_mask), 4328 100, 4329 timeout_usec); 4330 if (rc) 4331 dev_err(hdev->dev, 4332 "Failed to stop traffic from PCIe Decoder %d\n", 4333 dec_id); 4334 } 4335 } 4336 4337 static void gaudi2_stop_dec(struct hl_device *hdev) 4338 { 4339 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4340 int dcore_id; 4341 4342 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0) 4343 return; 4344 4345 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 4346 gaudi2_stop_dcore_dec(hdev, dcore_id); 4347 4348 gaudi2_stop_pcie_dec(hdev); 4349 } 4350 4351 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 4352 { 4353 u32 reg_base, reg_val; 4354 4355 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 4356 if (run_mode == HL_ENGINE_CORE_RUN) 4357 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 4358 else 4359 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 4360 4361 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 4362 } 4363 4364 static void gaudi2_halt_arcs(struct hl_device *hdev) 4365 { 4366 u16 arc_id; 4367 4368 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 4369 if (gaudi2_is_arc_enabled(hdev, arc_id)) 4370 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 4371 } 4372 } 4373 4374 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 4375 { 4376 int rc; 4377 u32 reg_base, val, ack_mask, timeout_usec = 100000; 4378 4379 if (hdev->pldm) 4380 timeout_usec *= 100; 4381 4382 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 4383 if (run_mode == HL_ENGINE_CORE_RUN) 4384 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 4385 else 4386 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 4387 4388 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 4389 val, ((val & ack_mask) == ack_mask), 4390 1000, timeout_usec); 4391 4392 if (!rc) { 4393 /* Clear */ 4394 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 4395 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 4396 } 4397 4398 return rc; 4399 } 4400 4401 static void gaudi2_reset_arcs(struct hl_device *hdev) 4402 { 4403 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4404 u16 arc_id; 4405 4406 if (!gaudi2) 4407 return; 4408 4409 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) 4410 if (gaudi2_is_arc_enabled(hdev, arc_id)) 4411 gaudi2_clr_arc_id_cap(hdev, arc_id); 4412 } 4413 4414 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) 4415 { 4416 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4417 u32 queue_id; 4418 int i; 4419 4420 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK)) 4421 return; 4422 4423 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 4424 4425 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 4426 if (!(hdev->nic_ports_mask & BIT(i))) 4427 continue; 4428 4429 gaudi2_qman_manual_flush_common(hdev, queue_id); 4430 } 4431 } 4432 4433 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 4434 u32 num_cores, u32 core_command) 4435 { 4436 int i, rc; 4437 4438 for (i = 0 ; i < num_cores ; i++) { 4439 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 4440 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 4441 } 4442 4443 for (i = 0 ; i < num_cores ; i++) { 4444 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 4445 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 4446 4447 if (rc) { 4448 dev_err(hdev->dev, "failed to %s arc: %d\n", 4449 (core_command == HL_ENGINE_CORE_HALT) ? 4450 "HALT" : "RUN", core_ids[i]); 4451 return -1; 4452 } 4453 } 4454 } 4455 4456 return 0; 4457 } 4458 4459 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) 4460 { 4461 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4462 u32 reg_base, reg_addr, reg_val, tpc_id; 4463 4464 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK)) 4465 return 0; 4466 4467 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id]; 4468 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id))) 4469 return 0; 4470 4471 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id]; 4472 reg_addr = reg_base + TPC_CFG_STALL_OFFSET; 4473 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK, 4474 !!(engine_command == HL_ENGINE_STALL)); 4475 WREG32(reg_addr, reg_val); 4476 4477 if (engine_command == HL_ENGINE_RESUME) { 4478 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id]; 4479 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET; 4480 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK); 4481 } 4482 4483 return 0; 4484 } 4485 4486 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) 4487 { 4488 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4489 u32 reg_base, reg_addr, reg_val, mme_id; 4490 4491 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id]; 4492 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id))) 4493 return 0; 4494 4495 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id]; 4496 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET; 4497 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK, 4498 !!(engine_command == HL_ENGINE_STALL)); 4499 WREG32(reg_addr, reg_val); 4500 4501 return 0; 4502 } 4503 4504 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command) 4505 { 4506 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4507 u32 reg_base, reg_addr, reg_val, edma_id; 4508 4509 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK)) 4510 return 0; 4511 4512 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id]; 4513 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id))) 4514 return 0; 4515 4516 reg_base = gaudi2_dma_core_blocks_bases[edma_id]; 4517 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET; 4518 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 4519 !!(engine_command == HL_ENGINE_STALL)); 4520 WREG32(reg_addr, reg_val); 4521 4522 if (engine_command == HL_ENGINE_STALL) { 4523 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) | 4524 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1); 4525 WREG32(reg_addr, reg_val); 4526 } 4527 4528 return 0; 4529 } 4530 4531 static int gaudi2_set_engine_modes(struct hl_device *hdev, 4532 u32 *engine_ids, u32 num_engines, u32 engine_command) 4533 { 4534 int i, rc; 4535 4536 for (i = 0 ; i < num_engines ; ++i) { 4537 switch (engine_ids[i]) { 4538 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5: 4539 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5: 4540 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5: 4541 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5: 4542 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command); 4543 if (rc) 4544 return rc; 4545 4546 break; 4547 case GAUDI2_DCORE0_ENGINE_ID_MME: 4548 case GAUDI2_DCORE1_ENGINE_ID_MME: 4549 case GAUDI2_DCORE2_ENGINE_ID_MME: 4550 case GAUDI2_DCORE3_ENGINE_ID_MME: 4551 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command); 4552 if (rc) 4553 return rc; 4554 4555 break; 4556 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1: 4557 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1: 4558 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1: 4559 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1: 4560 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command); 4561 if (rc) 4562 return rc; 4563 4564 break; 4565 default: 4566 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]); 4567 return -EINVAL; 4568 } 4569 } 4570 4571 return 0; 4572 } 4573 4574 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids, 4575 u32 num_engines, u32 engine_command) 4576 { 4577 switch (engine_command) { 4578 case HL_ENGINE_CORE_HALT: 4579 case HL_ENGINE_CORE_RUN: 4580 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command); 4581 4582 case HL_ENGINE_STALL: 4583 case HL_ENGINE_RESUME: 4584 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command); 4585 4586 default: 4587 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command); 4588 return -EINVAL; 4589 } 4590 } 4591 4592 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4593 { 4594 u32 wait_timeout_ms; 4595 4596 if (hdev->pldm) 4597 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC; 4598 else 4599 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 4600 4601 if (fw_reset) 4602 goto skip_engines; 4603 4604 gaudi2_stop_dma_qmans(hdev); 4605 gaudi2_stop_mme_qmans(hdev); 4606 gaudi2_stop_tpc_qmans(hdev); 4607 gaudi2_stop_rot_qmans(hdev); 4608 gaudi2_stop_nic_qmans(hdev); 4609 msleep(wait_timeout_ms); 4610 4611 gaudi2_halt_arcs(hdev); 4612 gaudi2_dma_stall(hdev); 4613 gaudi2_mme_stall(hdev); 4614 gaudi2_tpc_stall(hdev); 4615 gaudi2_rotator_stall(hdev); 4616 4617 msleep(wait_timeout_ms); 4618 4619 gaudi2_stop_dec(hdev); 4620 4621 /* 4622 * in case of soft reset do a manual flush for QMANs (currently called 4623 * only for NIC QMANs 4624 */ 4625 if (!hard_reset) 4626 gaudi2_nic_qmans_manual_flush(hdev); 4627 4628 gaudi2_disable_dma_qmans(hdev); 4629 gaudi2_disable_mme_qmans(hdev); 4630 gaudi2_disable_tpc_qmans(hdev); 4631 gaudi2_disable_rot_qmans(hdev); 4632 gaudi2_disable_nic_qmans(hdev); 4633 gaudi2_disable_timestamp(hdev); 4634 4635 skip_engines: 4636 if (hard_reset) { 4637 gaudi2_disable_msix(hdev); 4638 return; 4639 } 4640 4641 gaudi2_sync_irqs(hdev); 4642 } 4643 4644 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) 4645 { 4646 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 4647 4648 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 4649 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 4650 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 4651 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 4652 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 4653 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; 4654 } 4655 4656 static void gaudi2_init_firmware_loader(struct hl_device *hdev) 4657 { 4658 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 4659 struct dynamic_fw_load_mgr *dynamic_loader; 4660 struct cpu_dyn_regs *dyn_regs; 4661 4662 /* fill common fields */ 4663 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 4664 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE; 4665 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE; 4666 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC; 4667 fw_loader->skip_bmc = false; 4668 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID; 4669 fw_loader->dram_bar_id = DRAM_BAR_ID; 4670 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC; 4671 4672 /* here we update initial values for few specific dynamic regs (as 4673 * before reading the first descriptor from FW those value has to be 4674 * hard-coded). in later stages of the protocol those values will be 4675 * updated automatically by reading the FW descriptor so data there 4676 * will always be up-to-date 4677 */ 4678 dynamic_loader = &hdev->fw_loader.dynamic_loader; 4679 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 4680 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 4681 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 4682 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC; 4683 } 4684 4685 static int gaudi2_init_cpu(struct hl_device *hdev) 4686 { 4687 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4688 int rc; 4689 4690 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 4691 return 0; 4692 4693 if (gaudi2->hw_cap_initialized & HW_CAP_CPU) 4694 return 0; 4695 4696 rc = hl_fw_init_cpu(hdev); 4697 if (rc) 4698 return rc; 4699 4700 gaudi2->hw_cap_initialized |= HW_CAP_CPU; 4701 4702 return 0; 4703 } 4704 4705 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 4706 { 4707 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 4708 struct asic_fixed_properties *prop = &hdev->asic_prop; 4709 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4710 struct cpu_dyn_regs *dyn_regs; 4711 struct hl_eq *eq; 4712 u32 status; 4713 int err; 4714 4715 if (!hdev->cpu_queues_enable) 4716 return 0; 4717 4718 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 4719 return 0; 4720 4721 eq = &hdev->event_queue; 4722 4723 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4724 4725 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 4726 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 4727 4728 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 4729 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 4730 4731 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address)); 4732 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address)); 4733 4734 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 4735 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 4736 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 4737 4738 /* Used for EQ CI */ 4739 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 4740 4741 WREG32(mmCPU_IF_PF_PQ_PI, 0); 4742 4743 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); 4744 4745 /* Let the ARC know we are ready as it is now handling those queues */ 4746 4747 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 4748 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 4749 4750 err = hl_poll_timeout( 4751 hdev, 4752 mmCPU_IF_QUEUE_INIT, 4753 status, 4754 (status == PQ_INIT_STATUS_READY_FOR_HOST), 4755 1000, 4756 cpu_timeout); 4757 4758 if (err) { 4759 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n"); 4760 return -EIO; 4761 } 4762 4763 /* update FW application security bits */ 4764 if (prop->fw_cpu_boot_dev_sts0_valid) 4765 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 4766 4767 if (prop->fw_cpu_boot_dev_sts1_valid) 4768 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 4769 4770 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q; 4771 return 0; 4772 } 4773 4774 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base, 4775 u32 queue_id_base) 4776 { 4777 struct hl_hw_queue *q; 4778 u32 pq_id, pq_offset; 4779 4780 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4781 q = &hdev->kernel_queues[queue_id_base + pq_id]; 4782 pq_offset = pq_id * 4; 4783 4784 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset, 4785 lower_32_bits(q->bus_address)); 4786 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset, 4787 upper_32_bits(q->bus_address)); 4788 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH)); 4789 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0); 4790 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0); 4791 } 4792 } 4793 4794 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base) 4795 { 4796 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi; 4797 4798 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4799 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 4800 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4801 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4802 4803 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) { 4804 cp_offset = cp_id * 4; 4805 4806 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo); 4807 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi); 4808 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo); 4809 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi); 4810 } 4811 4812 /* allow QMANs to accept work from ARC CQF */ 4813 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1)); 4814 } 4815 4816 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base, 4817 u32 queue_id_base) 4818 { 4819 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4820 u32 pq_id, pq_offset, so_base_lo, so_base_hi; 4821 4822 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4823 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0); 4824 4825 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) { 4826 pq_offset = pq_id * 4; 4827 4828 /* Configure QMAN HBW to scratchpad as it is not needed */ 4829 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset, 4830 lower_32_bits(gaudi2->scratchpad_bus_address)); 4831 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset, 4832 upper_32_bits(gaudi2->scratchpad_bus_address)); 4833 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset, 4834 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry))); 4835 4836 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0); 4837 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA); 4838 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo); 4839 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi); 4840 } 4841 4842 /* Enable QMAN H/W completion */ 4843 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 4844 } 4845 4846 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base) 4847 { 4848 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4849 u32 sp_reg_addr; 4850 4851 switch (queue_id_base) { 4852 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3: 4853 fallthrough; 4854 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 4855 fallthrough; 4856 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 4857 fallthrough; 4858 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 4859 fallthrough; 4860 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 4861 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 4862 break; 4863 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 4864 fallthrough; 4865 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 4866 fallthrough; 4867 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 4868 fallthrough; 4869 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 4870 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 4871 break; 4872 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 4873 fallthrough; 4874 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 4875 fallthrough; 4876 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 4877 fallthrough; 4878 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 4879 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 4880 break; 4881 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3: 4882 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl); 4883 break; 4884 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3: 4885 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 4886 break; 4887 default: 4888 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base); 4889 return 0; 4890 } 4891 4892 return sp_reg_addr; 4893 } 4894 4895 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, 4896 u32 queue_id_base) 4897 { 4898 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset; 4899 int map_table_entry; 4900 4901 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot); 4902 4903 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base); 4904 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset)); 4905 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset)); 4906 4907 map_table_entry = gaudi2_qman_async_event_id[queue_id_base]; 4908 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET, 4909 gaudi2_irq_map_table[map_table_entry].cpu_id); 4910 4911 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK); 4912 4913 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT); 4914 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0); 4915 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4916 4917 /* Enable the QMAN channel. 4918 * PDMA QMAN configuration is different, as we do not allow user to 4919 * access some of the CPs. 4920 * PDMA0: CP2/3 are reserved for the ARC usage. 4921 * PDMA1: CP1/2/3 are reserved for the ARC usage. 4922 */ 4923 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4924 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4925 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4926 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4927 else 4928 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4929 } 4930 4931 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base, 4932 u32 queue_id_base) 4933 { 4934 u32 pq_id; 4935 4936 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) 4937 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION; 4938 4939 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base); 4940 gaudi2_init_qman_cp(hdev, reg_base); 4941 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base); 4942 gaudi2_init_qman_common(hdev, reg_base, queue_id_base); 4943 } 4944 4945 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base, 4946 u32 dma_core_id, bool is_secure) 4947 { 4948 u32 prot, irq_handler_offset; 4949 struct cpu_dyn_regs *dyn_regs; 4950 int map_table_entry; 4951 4952 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT; 4953 if (is_secure) 4954 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT; 4955 4956 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot); 4957 4958 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4959 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 4960 4961 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET, 4962 lower_32_bits(CFG_BASE + irq_handler_offset)); 4963 4964 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET, 4965 upper_32_bits(CFG_BASE + irq_handler_offset)); 4966 4967 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id]; 4968 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET, 4969 gaudi2_irq_map_table[map_table_entry].cpu_id); 4970 4971 /* Enable the DMA channel */ 4972 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT); 4973 } 4974 4975 static void gaudi2_init_kdma(struct hl_device *hdev) 4976 { 4977 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4978 u32 reg_base; 4979 4980 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA) 4981 return; 4982 4983 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA]; 4984 4985 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true); 4986 4987 gaudi2->hw_cap_initialized |= HW_CAP_KDMA; 4988 } 4989 4990 static void gaudi2_init_pdma(struct hl_device *hdev) 4991 { 4992 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4993 u32 reg_base; 4994 4995 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK) 4996 return; 4997 4998 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0]; 4999 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false); 5000 5001 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]; 5002 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0); 5003 5004 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1]; 5005 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false); 5006 5007 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]; 5008 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0); 5009 5010 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK; 5011 } 5012 5013 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq) 5014 { 5015 u32 reg_base, base_edma_core_id, base_edma_qman_id; 5016 5017 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq; 5018 base_edma_qman_id = edma_stream_base[seq]; 5019 5020 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id]; 5021 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false); 5022 5023 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id]; 5024 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id); 5025 } 5026 5027 static void gaudi2_init_edma(struct hl_device *hdev) 5028 { 5029 struct asic_fixed_properties *prop = &hdev->asic_prop; 5030 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5031 int dcore, inst; 5032 5033 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK) 5034 return; 5035 5036 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 5037 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) { 5038 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst; 5039 5040 if (!(prop->edma_enabled_mask & BIT(seq))) 5041 continue; 5042 5043 gaudi2_init_edma_instance(hdev, seq); 5044 5045 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq); 5046 } 5047 } 5048 } 5049 5050 /* 5051 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell. 5052 * @hdev: pointer to habanalabs device structure. 5053 * @sob_id: sync object ID. 5054 * @first_mon_id: ID of first monitor out of 3 consecutive monitors. 5055 * @interrupt_id: interrupt ID. 5056 * 5057 * Some initiators cannot have HBW address in their completion address registers, and thus cannot 5058 * write directly to the HBW host memory of the virtual MSI-X doorbell. 5059 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write. 5060 * 5061 * The mechanism in the sync manager block is composed of a master monitor with 3 messages. 5062 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next 5063 * completion, by decrementing the sync object value and re-arming the monitor. 5064 */ 5065 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id, 5066 u32 first_mon_id, u32 interrupt_id) 5067 { 5068 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config; 5069 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5070 u64 addr; 5071 u8 mask; 5072 5073 /* Reset the SOB value */ 5074 sob_offset = sob_id * sizeof(u32); 5075 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 5076 5077 /* Configure 3 monitors: 5078 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor) 5079 * 2. Decrement SOB value by 1. 5080 * 3. Re-arm the master monitor. 5081 */ 5082 5083 first_mon_offset = first_mon_id * sizeof(u32); 5084 5085 /* 2nd monitor: Decrement SOB value by 1 */ 5086 mon_offset = first_mon_offset + sizeof(u32); 5087 5088 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 5089 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 5090 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 5091 5092 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */ 5093 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) | 5094 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1); 5095 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 5096 5097 /* 3rd monitor: Re-arm the master monitor */ 5098 mon_offset = first_mon_offset + 2 * sizeof(u32); 5099 5100 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset; 5101 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 5102 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 5103 5104 sob_group = sob_id / 8; 5105 mask = ~BIT(sob_id & 0x7); 5106 mode = 0; /* comparison mode is "greater than or equal to" */ 5107 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) | 5108 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) | 5109 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) | 5110 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1); 5111 5112 payload = arm; 5113 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 5114 5115 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */ 5116 mon_offset = first_mon_offset; 5117 5118 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */ 5119 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config); 5120 5121 addr = gaudi2->virt_msix_db_dma_addr; 5122 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr)); 5123 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr)); 5124 5125 payload = interrupt_id; 5126 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload); 5127 5128 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm); 5129 } 5130 5131 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev) 5132 { 5133 u32 decoder_id, sob_id, first_mon_id, interrupt_id; 5134 struct asic_fixed_properties *prop = &hdev->asic_prop; 5135 5136 /* Decoder normal/abnormal interrupts */ 5137 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) { 5138 if (!(prop->decoder_enabled_mask & BIT(decoder_id))) 5139 continue; 5140 5141 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 5142 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id; 5143 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id; 5144 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 5145 5146 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 5147 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id; 5148 interrupt_id += 1; 5149 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id); 5150 } 5151 } 5152 5153 static void gaudi2_init_sm(struct hl_device *hdev) 5154 { 5155 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5156 u64 cq_address; 5157 u32 reg_val; 5158 int i; 5159 5160 /* Enable HBW/LBW CQ for completion monitors */ 5161 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 5162 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1); 5163 5164 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++) 5165 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 5166 5167 /* Enable only HBW CQ for KDMA completion monitor */ 5168 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1); 5169 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val); 5170 5171 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */ 5172 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr)); 5173 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr)); 5174 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION); 5175 5176 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) { 5177 cq_address = 5178 hdev->completion_queue[i].bus_address; 5179 5180 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i), 5181 lower_32_bits(cq_address)); 5182 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i), 5183 upper_32_bits(cq_address)); 5184 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i), 5185 ilog2(HL_CQ_SIZE_IN_BYTES)); 5186 } 5187 5188 /* Configure kernel ASID and MMU BP*/ 5189 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000); 5190 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0); 5191 5192 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */ 5193 gaudi2_prepare_sm_for_virt_msix_db(hdev); 5194 } 5195 5196 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base) 5197 { 5198 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5199 u32 reg_val; 5200 int i; 5201 5202 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0); 5203 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1); 5204 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1); 5205 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1); 5206 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1); 5207 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1); 5208 5209 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val); 5210 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF); 5211 5212 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) { 5213 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i); 5214 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]); 5215 } 5216 } 5217 5218 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id, 5219 bool config_qman_only) 5220 { 5221 u32 queue_id_base, reg_base; 5222 5223 switch (dcore_id) { 5224 case 0: 5225 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 5226 break; 5227 case 1: 5228 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 5229 break; 5230 case 2: 5231 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 5232 break; 5233 case 3: 5234 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 5235 break; 5236 default: 5237 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id); 5238 return; 5239 } 5240 5241 if (!config_qman_only) { 5242 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id]; 5243 gaudi2_init_mme_acc(hdev, reg_base); 5244 } 5245 5246 reg_base = gaudi2_qm_blocks_bases[queue_id_base]; 5247 gaudi2_init_qman(hdev, reg_base, queue_id_base); 5248 } 5249 5250 static void gaudi2_init_mme(struct hl_device *hdev) 5251 { 5252 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5253 int i; 5254 5255 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK) 5256 return; 5257 5258 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 5259 gaudi2_init_dcore_mme(hdev, i, false); 5260 5261 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i); 5262 } 5263 } 5264 5265 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base) 5266 { 5267 /* Mask arithmetic and QM interrupts in TPC */ 5268 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE); 5269 5270 /* Set 16 cache lines */ 5271 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET, 5272 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT); 5273 } 5274 5275 struct gaudi2_tpc_init_cfg_data { 5276 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES]; 5277 }; 5278 5279 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 5280 u32 offset, struct iterate_module_ctx *ctx) 5281 { 5282 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5283 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 5284 u32 queue_id_base; 5285 u8 seq; 5286 5287 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN); 5288 5289 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1)) 5290 /* gets last sequence number */ 5291 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE; 5292 else 5293 seq = dcore * NUM_OF_TPC_PER_DCORE + inst; 5294 5295 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset); 5296 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base); 5297 5298 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq); 5299 } 5300 5301 static void gaudi2_init_tpc(struct hl_device *hdev) 5302 { 5303 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5304 struct gaudi2_tpc_init_cfg_data init_cfg_data; 5305 struct iterate_module_ctx tpc_iter; 5306 5307 if (!hdev->asic_prop.tpc_enabled_mask) 5308 return; 5309 5310 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK) 5311 return; 5312 5313 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0; 5314 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0; 5315 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0; 5316 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0; 5317 tpc_iter.fn = &gaudi2_init_tpc_config; 5318 tpc_iter.data = &init_cfg_data; 5319 gaudi2_iterate_tpcs(hdev, &tpc_iter); 5320 } 5321 5322 static void gaudi2_init_rotator(struct hl_device *hdev) 5323 { 5324 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5325 u32 i, reg_base, queue_id; 5326 5327 queue_id = GAUDI2_QUEUE_ID_ROT_0_0; 5328 5329 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 5330 reg_base = gaudi2_qm_blocks_bases[queue_id]; 5331 gaudi2_init_qman(hdev, reg_base, queue_id); 5332 5333 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i); 5334 } 5335 } 5336 5337 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id) 5338 { 5339 u32 sob_id; 5340 5341 /* VCMD normal interrupt */ 5342 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id; 5343 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, 5344 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 5345 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 5346 5347 /* VCMD abnormal interrupt */ 5348 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id; 5349 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, 5350 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32)); 5351 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE); 5352 } 5353 5354 static void gaudi2_init_dec(struct hl_device *hdev) 5355 { 5356 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5357 u32 dcore_id, dec_id, dec_bit; 5358 u64 base_addr; 5359 5360 if (!hdev->asic_prop.decoder_enabled_mask) 5361 return; 5362 5363 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK) 5364 return; 5365 5366 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5367 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) { 5368 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id; 5369 5370 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 5371 continue; 5372 5373 base_addr = mmDCORE0_DEC0_CMD_BASE + 5374 BRDG_CTRL_BLOCK_OFFSET + 5375 dcore_id * DCORE_OFFSET + 5376 dec_id * DCORE_VDEC_OFFSET; 5377 5378 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 5379 5380 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 5381 } 5382 5383 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) { 5384 dec_bit = PCIE_DEC_SHIFT + dec_id; 5385 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit))) 5386 continue; 5387 5388 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET + 5389 dec_id * DCORE_VDEC_OFFSET; 5390 5391 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit); 5392 5393 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit); 5394 } 5395 } 5396 5397 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev, 5398 u32 stlb_base, u32 asid, u64 phys_addr) 5399 { 5400 u32 status, timeout_usec; 5401 int rc; 5402 5403 if (hdev->pldm || !hdev->pdev) 5404 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5405 else 5406 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 5407 5408 WREG32(stlb_base + STLB_ASID_OFFSET, asid); 5409 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 5410 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT); 5411 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000); 5412 5413 rc = hl_poll_timeout( 5414 hdev, 5415 stlb_base + STLB_BUSY_OFFSET, 5416 status, 5417 !(status & 0x80000000), 5418 1000, 5419 timeout_usec); 5420 5421 if (rc) { 5422 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid); 5423 return rc; 5424 } 5425 5426 return 0; 5427 } 5428 5429 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base, 5430 u32 start_offset, u32 inv_start_val, 5431 u32 flags) 5432 { 5433 /* clear PMMU mem line cache (only needed in mmu range invalidation) */ 5434 if (flags & MMU_OP_CLEAR_MEMCACHE) 5435 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1); 5436 5437 if (flags & MMU_OP_SKIP_LOW_CACHE_INV) 5438 return; 5439 5440 WREG32(stlb_base + start_offset, inv_start_val); 5441 } 5442 5443 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base, 5444 struct gaudi2_cache_invld_params *inv_params) 5445 { 5446 u32 status, timeout_usec, start_offset; 5447 int rc; 5448 5449 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC : 5450 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5451 5452 /* poll PMMU mem line cache (only needed in mmu range invalidation) */ 5453 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) { 5454 rc = hl_poll_timeout( 5455 hdev, 5456 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 5457 status, 5458 status & 0x1, 5459 1000, 5460 timeout_usec); 5461 5462 if (rc) 5463 return rc; 5464 5465 /* Need to manually reset the status to 0 */ 5466 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0); 5467 } 5468 5469 /* Lower cache does not work with cache lines, hence we can skip its 5470 * invalidation upon map and invalidate only upon unmap 5471 */ 5472 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV) 5473 return 0; 5474 5475 start_offset = inv_params->range_invalidation ? 5476 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET; 5477 5478 rc = hl_poll_timeout( 5479 hdev, 5480 stlb_base + start_offset, 5481 status, 5482 !(status & 0x1), 5483 1000, 5484 timeout_usec); 5485 5486 return rc; 5487 } 5488 5489 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id) 5490 { 5491 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5492 u32 hw_cap; 5493 5494 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id); 5495 5496 if (gaudi2->hw_cap_initialized & hw_cap) 5497 return true; 5498 5499 return false; 5500 } 5501 5502 /* this function shall be called only for HMMUs for which capability bit is set */ 5503 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id) 5504 { 5505 u32 offset; 5506 5507 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5508 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset); 5509 } 5510 5511 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base, 5512 struct gaudi2_cache_invld_params *inv_params) 5513 { 5514 u32 start_offset; 5515 5516 if (inv_params->range_invalidation) { 5517 /* Set the addresses range 5518 * Note: that the start address we set in register, is not included in 5519 * the range of the invalidation, by design. 5520 * that's why we need to set lower address than the one we actually 5521 * want to be included in the range invalidation. 5522 */ 5523 u64 start = inv_params->start_va - 1; 5524 5525 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET; 5526 5527 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET, 5528 start >> MMU_RANGE_INV_VA_LSB_SHIFT); 5529 5530 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET, 5531 start >> MMU_RANGE_INV_VA_MSB_SHIFT); 5532 5533 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET, 5534 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT); 5535 5536 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET, 5537 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT); 5538 } else { 5539 start_offset = STLB_INV_ALL_START_OFFSET; 5540 } 5541 5542 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset, 5543 inv_params->inv_start_val, inv_params->flags); 5544 } 5545 5546 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev, 5547 int dcore_id, int hmmu_id, 5548 struct gaudi2_cache_invld_params *inv_params) 5549 { 5550 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 5551 5552 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params); 5553 } 5554 5555 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev, 5556 int dcore_id, int hmmu_id, 5557 struct gaudi2_cache_invld_params *inv_params) 5558 { 5559 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id); 5560 5561 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params); 5562 } 5563 5564 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev, 5565 struct gaudi2_cache_invld_params *inv_params) 5566 { 5567 int dcore_id, hmmu_id; 5568 5569 /* first send all invalidation commands */ 5570 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 5571 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 5572 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 5573 continue; 5574 5575 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params); 5576 } 5577 } 5578 5579 /* next, poll all invalidations status */ 5580 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 5581 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) { 5582 int rc; 5583 5584 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id)) 5585 continue; 5586 5587 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id, 5588 inv_params); 5589 if (rc) 5590 return rc; 5591 } 5592 } 5593 5594 return 0; 5595 } 5596 5597 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 5598 { 5599 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5600 struct gaudi2_cache_invld_params invld_params; 5601 int rc = 0; 5602 5603 if (hdev->reset_info.hard_reset_pending) 5604 return rc; 5605 5606 invld_params.range_invalidation = false; 5607 invld_params.inv_start_val = 1; 5608 5609 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5610 invld_params.flags = flags; 5611 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 5612 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 5613 &invld_params); 5614 } else if (flags & MMU_OP_PHYS_PACK) { 5615 invld_params.flags = 0; 5616 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 5617 } 5618 5619 return rc; 5620 } 5621 5622 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, 5623 u32 flags, u32 asid, u64 va, u64 size) 5624 { 5625 struct gaudi2_cache_invld_params invld_params = {0}; 5626 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5627 u64 start_va, end_va; 5628 u32 inv_start_val; 5629 int rc = 0; 5630 5631 if (hdev->reset_info.hard_reset_pending) 5632 return 0; 5633 5634 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT | 5635 1 << MMU_RANGE_INV_ASID_EN_SHIFT | 5636 asid << MMU_RANGE_INV_ASID_SHIFT); 5637 start_va = va; 5638 end_va = start_va + size; 5639 5640 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 5641 /* As range invalidation does not support zero address we will 5642 * do full invalidation in this case 5643 */ 5644 if (start_va) { 5645 invld_params.range_invalidation = true; 5646 invld_params.start_va = start_va; 5647 invld_params.end_va = end_va; 5648 invld_params.inv_start_val = inv_start_val; 5649 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE; 5650 } else { 5651 invld_params.range_invalidation = false; 5652 invld_params.inv_start_val = 1; 5653 invld_params.flags = flags; 5654 } 5655 5656 5657 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params); 5658 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE, 5659 &invld_params); 5660 if (rc) 5661 return rc; 5662 5663 } else if (flags & MMU_OP_PHYS_PACK) { 5664 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va); 5665 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va); 5666 invld_params.inv_start_val = inv_start_val; 5667 invld_params.flags = flags; 5668 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params); 5669 } 5670 5671 return rc; 5672 } 5673 5674 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) 5675 { 5676 struct asic_fixed_properties *prop = &hdev->asic_prop; 5677 u64 hop0_addr; 5678 u32 asid, max_asid = prop->max_asid; 5679 int rc; 5680 5681 /* it takes too much time to init all of the ASIDs on palladium */ 5682 if (hdev->pldm) 5683 max_asid = min((u32) 8, max_asid); 5684 5685 for (asid = 0 ; asid < max_asid ; asid++) { 5686 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr; 5687 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr); 5688 if (rc) { 5689 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid); 5690 return rc; 5691 } 5692 } 5693 5694 return 0; 5695 } 5696 5697 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5698 { 5699 u32 status, timeout_usec; 5700 int rc; 5701 5702 if (hdev->pldm || !hdev->pdev) 5703 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC; 5704 else 5705 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC; 5706 5707 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1); 5708 5709 rc = hl_poll_timeout( 5710 hdev, 5711 stlb_base + STLB_SRAM_INIT_OFFSET, 5712 status, 5713 !status, 5714 1000, 5715 timeout_usec); 5716 5717 if (rc) 5718 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n"); 5719 5720 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base); 5721 if (rc) 5722 return rc; 5723 5724 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5725 5726 rc = hl_poll_timeout( 5727 hdev, 5728 stlb_base + STLB_INV_ALL_START_OFFSET, 5729 status, 5730 !status, 5731 1000, 5732 timeout_usec); 5733 5734 if (rc) 5735 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n"); 5736 5737 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1); 5738 5739 return rc; 5740 } 5741 5742 static int gaudi2_pci_mmu_init(struct hl_device *hdev) 5743 { 5744 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5745 u32 mmu_base, stlb_base; 5746 int rc; 5747 5748 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) 5749 return 0; 5750 5751 mmu_base = mmPMMU_HBW_MMU_BASE; 5752 stlb_base = mmPMMU_HBW_STLB_BASE; 5753 5754 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5755 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) | 5756 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) | 5757 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) | 5758 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) | 5759 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT), 5760 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5761 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5762 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5763 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5764 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5765 5766 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0); 5767 5768 if (PAGE_SIZE == SZ_64K) { 5769 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */ 5770 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5771 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) | 5772 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) | 5773 FIELD_PREP( 5774 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK, 5775 1), 5776 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK | 5777 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK | 5778 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5779 } 5780 5781 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5782 5783 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5784 if (rc) 5785 return rc; 5786 5787 gaudi2->hw_cap_initialized |= HW_CAP_PMMU; 5788 5789 return 0; 5790 } 5791 5792 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, 5793 int hmmu_id) 5794 { 5795 struct asic_fixed_properties *prop = &hdev->asic_prop; 5796 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5797 u32 offset, mmu_base, stlb_base, hw_cap; 5798 u8 dmmu_seq; 5799 int rc; 5800 5801 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id; 5802 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq; 5803 5804 /* 5805 * return if DMMU is already initialized or if it's not out of 5806 * isolation (due to cluster binning) 5807 */ 5808 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq))) 5809 return 0; 5810 5811 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET); 5812 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset; 5813 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset; 5814 5815 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */, 5816 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK); 5817 5818 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 5819 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) | 5820 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) | 5821 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) | 5822 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) | 5823 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3), 5824 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK | 5825 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK | 5826 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK | 5827 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK | 5828 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK); 5829 5830 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5831 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5832 5833 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5834 5835 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5836 if (rc) 5837 return rc; 5838 5839 gaudi2->hw_cap_initialized |= hw_cap; 5840 5841 return 0; 5842 } 5843 5844 static int gaudi2_hbm_mmu_init(struct hl_device *hdev) 5845 { 5846 int rc, dcore_id, hmmu_id; 5847 5848 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) 5849 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) { 5850 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id); 5851 if (rc) 5852 return rc; 5853 } 5854 5855 return 0; 5856 } 5857 5858 static int gaudi2_mmu_init(struct hl_device *hdev) 5859 { 5860 int rc; 5861 5862 rc = gaudi2_pci_mmu_init(hdev); 5863 if (rc) 5864 return rc; 5865 5866 rc = gaudi2_hbm_mmu_init(hdev); 5867 if (rc) 5868 return rc; 5869 5870 return 0; 5871 } 5872 5873 static int gaudi2_hw_init(struct hl_device *hdev) 5874 { 5875 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5876 int rc; 5877 5878 /* Let's mark in the H/W that we have reached this point. We check 5879 * this value in the reset_before_init function to understand whether 5880 * we need to reset the chip before doing H/W init. This register is 5881 * cleared by the H/W upon H/W reset 5882 */ 5883 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 5884 5885 /* Perform read from the device to make sure device is up */ 5886 RREG32(mmHW_STATE); 5887 5888 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 5889 * So we set it here and if anyone tries to move it later to 5890 * a different address, there will be an error 5891 */ 5892 if (hdev->asic_prop.iatu_done_by_fw) 5893 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE; 5894 5895 /* 5896 * Before pushing u-boot/linux to device, need to set the hbm bar to 5897 * base address of dram 5898 */ 5899 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 5900 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n"); 5901 return -EIO; 5902 } 5903 5904 rc = gaudi2_init_cpu(hdev); 5905 if (rc) { 5906 dev_err(hdev->dev, "failed to initialize CPU\n"); 5907 return rc; 5908 } 5909 5910 gaudi2_init_scrambler_hbm(hdev); 5911 gaudi2_init_kdma(hdev); 5912 5913 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC); 5914 if (rc) { 5915 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc); 5916 return rc; 5917 } 5918 5919 rc = gaudi2->cpucp_info_get(hdev); 5920 if (rc) { 5921 dev_err(hdev->dev, "Failed to get cpucp info\n"); 5922 return rc; 5923 } 5924 5925 rc = gaudi2_mmu_init(hdev); 5926 if (rc) 5927 return rc; 5928 5929 gaudi2_init_pdma(hdev); 5930 gaudi2_init_edma(hdev); 5931 gaudi2_init_sm(hdev); 5932 gaudi2_init_tpc(hdev); 5933 gaudi2_init_mme(hdev); 5934 gaudi2_init_rotator(hdev); 5935 gaudi2_init_dec(hdev); 5936 gaudi2_enable_timestamp(hdev); 5937 5938 rc = gaudi2_coresight_init(hdev); 5939 if (rc) 5940 goto disable_queues; 5941 5942 rc = gaudi2_enable_msix(hdev); 5943 if (rc) 5944 goto disable_queues; 5945 5946 /* Perform read from the device to flush all configuration */ 5947 RREG32(mmHW_STATE); 5948 5949 return 0; 5950 5951 disable_queues: 5952 gaudi2_disable_dma_qmans(hdev); 5953 gaudi2_disable_mme_qmans(hdev); 5954 gaudi2_disable_tpc_qmans(hdev); 5955 gaudi2_disable_rot_qmans(hdev); 5956 gaudi2_disable_nic_qmans(hdev); 5957 5958 gaudi2_disable_timestamp(hdev); 5959 5960 return rc; 5961 } 5962 5963 /** 5964 * gaudi2_send_hard_reset_cmd - common function to handle reset 5965 * 5966 * @hdev: pointer to the habanalabs device structure 5967 * 5968 * This function handles the various possible scenarios for reset. 5969 * It considers if reset is handled by driver\FW and what FW components are loaded 5970 */ 5971 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev) 5972 { 5973 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 5974 bool heartbeat_reset, preboot_only, cpu_initialized = false; 5975 struct gaudi2_device *gaudi2 = hdev->asic_specific; 5976 u32 cpu_boot_status; 5977 5978 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU); 5979 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT); 5980 5981 /* 5982 * Handle corner case where failure was at cpu management app load, 5983 * and driver didn't detect any failure while loading the FW, 5984 * then at such scenario driver will send only HALT_MACHINE 5985 * and no one will respond to this request since FW already back to preboot 5986 * and it cannot handle such cmd. 5987 * In this case next time the management app loads it'll check on events register 5988 * which will still have the halt indication, and will reboot the device. 5989 * The solution is to let preboot clear all relevant registers before next boot 5990 * once driver send COMMS_RST_DEV. 5991 */ 5992 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS); 5993 5994 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) && 5995 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL)) 5996 cpu_initialized = true; 5997 5998 /* 5999 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways: 6000 * 1. FW reset: FW initiate the reset sequence 6001 * 2. driver reset: FW will start HALT sequence (the preparations for the 6002 * reset but not the reset itself as it is not implemented 6003 * on their part) and LKD will wait to let FW complete the 6004 * sequence before issuing the reset 6005 */ 6006 if (!preboot_only && cpu_initialized) { 6007 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq), 6008 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id); 6009 6010 msleep(GAUDI2_CPU_RESET_WAIT_MSEC); 6011 } 6012 6013 /* 6014 * When working with preboot (without Linux/Boot fit) we can 6015 * communicate only using the COMMS commands to issue halt/reset. 6016 * 6017 * For the case in which we are working with Linux/Bootfit this is a hail-mary 6018 * attempt to revive the card in the small chance that the f/w has 6019 * experienced a watchdog event, which caused it to return back to preboot. 6020 * In that case, triggering reset through GIC won't help. We need to 6021 * trigger the reset as if Linux wasn't loaded. 6022 * 6023 * We do it only if the reset cause was HB, because that would be the 6024 * indication of such an event. 6025 * 6026 * In case watchdog hasn't expired but we still got HB, then this won't 6027 * do any damage. 6028 */ 6029 6030 if (heartbeat_reset || preboot_only || !cpu_initialized) { 6031 if (hdev->asic_prop.hard_reset_done_by_fw) 6032 hl_fw_ask_hard_reset_without_linux(hdev); 6033 else 6034 hl_fw_ask_halt_machine_without_linux(hdev); 6035 } 6036 } 6037 6038 /** 6039 * gaudi2_execute_hard_reset - execute hard reset by driver/FW 6040 * 6041 * @hdev: pointer to the habanalabs device structure 6042 * 6043 * This function executes hard reset based on if driver/FW should do the reset 6044 */ 6045 static void gaudi2_execute_hard_reset(struct hl_device *hdev) 6046 { 6047 if (hdev->asic_prop.hard_reset_done_by_fw) { 6048 gaudi2_send_hard_reset_cmd(hdev); 6049 return; 6050 } 6051 6052 /* Set device to handle FLR by H/W as we will put the device 6053 * CPU to halt mode 6054 */ 6055 WREG32(mmPCIE_AUX_FLR_CTRL, 6056 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 6057 6058 gaudi2_send_hard_reset_cmd(hdev); 6059 6060 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1); 6061 } 6062 6063 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us) 6064 { 6065 int i, rc = 0; 6066 u32 reg_val; 6067 6068 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 6069 rc = hl_poll_timeout( 6070 hdev, 6071 mmCPU_RST_STATUS_TO_HOST, 6072 reg_val, 6073 reg_val == CPU_RST_STATUS_SOFT_RST_DONE, 6074 1000, 6075 poll_timeout_us); 6076 6077 if (rc) 6078 dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n", 6079 reg_val); 6080 return rc; 6081 } 6082 6083 /** 6084 * gaudi2_execute_soft_reset - execute soft reset by driver/FW 6085 * 6086 * @hdev: pointer to the habanalabs device structure 6087 * @driver_performs_reset: true if driver should perform reset instead of f/w. 6088 * @poll_timeout_us: time to wait for response from f/w. 6089 * 6090 * This function executes soft reset based on if driver/FW should do the reset 6091 */ 6092 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset, 6093 u32 poll_timeout_us) 6094 { 6095 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 6096 6097 if (!driver_performs_reset) { 6098 /* set SP to indicate reset request sent to FW */ 6099 if (dyn_regs->cpu_rst_status) 6100 WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 6101 else 6102 WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 6103 6104 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 6105 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); 6106 6107 return gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); 6108 } 6109 6110 /* Block access to engines, QMANs and SM during reset, these 6111 * RRs will be reconfigured after soft reset. 6112 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset. 6113 */ 6114 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1, 6115 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE); 6116 6117 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2, 6118 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE, 6119 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE); 6120 6121 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1); 6122 return 0; 6123 } 6124 6125 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us) 6126 { 6127 int i, rc = 0; 6128 u32 reg_val; 6129 6130 /* We poll the BTM done indication multiple times after reset due to 6131 * a HW errata 'GAUDI2_0300' 6132 */ 6133 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++) 6134 rc = hl_poll_timeout( 6135 hdev, 6136 mmPSOC_GLOBAL_CONF_BTM_FSM, 6137 reg_val, 6138 reg_val == 0, 6139 1000, 6140 poll_timeout_us); 6141 6142 if (rc) 6143 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val); 6144 } 6145 6146 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 6147 { 6148 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6149 u32 poll_timeout_us, reset_sleep_ms; 6150 bool driver_performs_reset = false; 6151 int rc; 6152 6153 if (hdev->pldm) { 6154 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC : 6155 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC; 6156 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC; 6157 } else { 6158 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC; 6159 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC; 6160 } 6161 6162 if (fw_reset) 6163 goto skip_reset; 6164 6165 gaudi2_reset_arcs(hdev); 6166 6167 if (hard_reset) { 6168 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw; 6169 gaudi2_execute_hard_reset(hdev); 6170 } else { 6171 /* 6172 * As we have to support also work with preboot only (which does not supports 6173 * soft reset) we have to make sure that security is disabled before letting driver 6174 * do the reset. user shall control the BFE flags to avoid asking soft reset in 6175 * secured device with preboot only. 6176 */ 6177 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU && 6178 !hdev->asic_prop.fw_security_enabled); 6179 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us); 6180 if (rc) 6181 return rc; 6182 } 6183 6184 skip_reset: 6185 if (driver_performs_reset || hard_reset) { 6186 /* 6187 * Instead of waiting for BTM indication we should wait for preboot ready: 6188 * Consider the below scenario: 6189 * 1. FW update is being triggered 6190 * - setting the dirty bit 6191 * 2. hard reset will be triggered due to the dirty bit 6192 * 3. FW initiates the reset: 6193 * - dirty bit cleared 6194 * - BTM indication cleared 6195 * - preboot ready indication cleared 6196 * 4. during hard reset: 6197 * - BTM indication will be set 6198 * - BIST test performed and another reset triggered 6199 * 5. only after this reset the preboot will set the preboot ready 6200 * 6201 * when polling on BTM indication alone we can lose sync with FW while trying to 6202 * communicate with FW that is during reset. 6203 * to overcome this we will always wait to preboot ready indication 6204 */ 6205 6206 /* without this sleep reset will not work */ 6207 msleep(reset_sleep_ms); 6208 6209 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU) 6210 hl_fw_wait_preboot_ready(hdev); 6211 else 6212 gaudi2_poll_btm_indication(hdev, poll_timeout_us); 6213 } 6214 6215 if (!gaudi2) 6216 return 0; 6217 6218 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK); 6219 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK); 6220 6221 /* 6222 * Clear NIC capability mask in order for driver to re-configure 6223 * NIC QMANs. NIC ports will not be re-configured during soft 6224 * reset as we call gaudi2_nic_init only during hard reset 6225 */ 6226 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK); 6227 6228 if (hard_reset) { 6229 gaudi2->hw_cap_initialized &= 6230 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK | 6231 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q | 6232 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK | 6233 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA | 6234 HW_CAP_MME_MASK | HW_CAP_ROT_MASK); 6235 6236 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat)); 6237 } else { 6238 gaudi2->hw_cap_initialized &= 6239 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET | 6240 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK | 6241 HW_CAP_ROT_MASK); 6242 } 6243 return 0; 6244 } 6245 6246 static int gaudi2_suspend(struct hl_device *hdev) 6247 { 6248 int rc; 6249 6250 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 6251 if (rc) 6252 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 6253 6254 return rc; 6255 } 6256 6257 static int gaudi2_resume(struct hl_device *hdev) 6258 { 6259 return gaudi2_init_iatu(hdev); 6260 } 6261 6262 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 6263 void *cpu_addr, dma_addr_t dma_addr, size_t size) 6264 { 6265 int rc; 6266 6267 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 6268 VM_DONTCOPY | VM_NORESERVE); 6269 6270 #ifdef _HAS_DMA_MMAP_COHERENT 6271 6272 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 6273 if (rc) 6274 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 6275 6276 #else 6277 6278 rc = remap_pfn_range(vma, vma->vm_start, 6279 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 6280 size, vma->vm_page_prot); 6281 if (rc) 6282 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 6283 6284 #endif 6285 6286 return rc; 6287 } 6288 6289 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) 6290 { 6291 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6292 u64 hw_cap_mask = 0; 6293 u64 hw_tpc_cap_bit = 0; 6294 u64 hw_nic_cap_bit = 0; 6295 u64 hw_test_cap_bit = 0; 6296 6297 switch (hw_queue_id) { 6298 case GAUDI2_QUEUE_ID_PDMA_0_0: 6299 case GAUDI2_QUEUE_ID_PDMA_0_1: 6300 case GAUDI2_QUEUE_ID_PDMA_1_0: 6301 hw_cap_mask = HW_CAP_PDMA_MASK; 6302 break; 6303 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 6304 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 6305 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); 6306 break; 6307 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3: 6308 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE + 6309 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2); 6310 break; 6311 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3: 6312 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE + 6313 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2); 6314 break; 6315 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3: 6316 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE + 6317 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2); 6318 break; 6319 6320 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3: 6321 hw_test_cap_bit = HW_CAP_MME_SHIFT; 6322 break; 6323 6324 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3: 6325 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1; 6326 break; 6327 6328 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3: 6329 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2; 6330 break; 6331 6332 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3: 6333 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3; 6334 break; 6335 6336 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3: 6337 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + 6338 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2); 6339 6340 /* special case where cap bit refers to the first queue id */ 6341 if (!hw_tpc_cap_bit) 6342 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0)); 6343 break; 6344 6345 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3: 6346 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE + 6347 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2); 6348 break; 6349 6350 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3: 6351 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) + 6352 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2); 6353 break; 6354 6355 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3: 6356 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) + 6357 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2); 6358 break; 6359 6360 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3: 6361 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE); 6362 break; 6363 6364 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3: 6365 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2); 6366 break; 6367 6368 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3: 6369 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2); 6370 6371 /* special case where cap bit refers to the first queue id */ 6372 if (!hw_nic_cap_bit) 6373 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0)); 6374 break; 6375 6376 case GAUDI2_QUEUE_ID_CPU_PQ: 6377 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q); 6378 6379 default: 6380 return false; 6381 } 6382 6383 if (hw_tpc_cap_bit) 6384 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit)); 6385 6386 if (hw_nic_cap_bit) 6387 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit)); 6388 6389 if (hw_test_cap_bit) 6390 hw_cap_mask = BIT_ULL(hw_test_cap_bit); 6391 6392 return !!(gaudi2->hw_cap_initialized & hw_cap_mask); 6393 } 6394 6395 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id) 6396 { 6397 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6398 6399 switch (arc_id) { 6400 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 6401 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 6402 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id)); 6403 6404 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 6405 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 6406 6407 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 6408 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 6409 6410 default: 6411 return false; 6412 } 6413 } 6414 6415 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id) 6416 { 6417 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6418 6419 switch (arc_id) { 6420 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 6421 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 6422 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id)); 6423 break; 6424 6425 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 6426 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)); 6427 break; 6428 6429 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 6430 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)); 6431 break; 6432 6433 default: 6434 return; 6435 } 6436 } 6437 6438 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id) 6439 { 6440 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6441 6442 switch (arc_id) { 6443 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5: 6444 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1: 6445 gaudi2->active_hw_arc |= BIT_ULL(arc_id); 6446 break; 6447 6448 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24: 6449 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0); 6450 break; 6451 6452 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23: 6453 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0); 6454 break; 6455 6456 default: 6457 return; 6458 } 6459 } 6460 6461 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 6462 { 6463 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 6464 u32 pq_offset, reg_base, db_reg_offset, db_value; 6465 6466 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) { 6467 /* 6468 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs. 6469 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ 6470 * number. 6471 */ 6472 pq_offset = (hw_queue_id & 0x3) * 4; 6473 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6474 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset; 6475 } else { 6476 db_reg_offset = mmCPU_IF_PF_PQ_PI; 6477 } 6478 6479 db_value = pi; 6480 6481 /* ring the doorbell */ 6482 WREG32(db_reg_offset, db_value); 6483 6484 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) { 6485 /* make sure device CPU will read latest data from host */ 6486 mb(); 6487 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq), 6488 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id); 6489 } 6490 } 6491 6492 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) 6493 { 6494 __le64 *pbd = (__le64 *) bd; 6495 6496 /* The QMANs are on the host memory so a simple copy suffice */ 6497 pqe[0] = pbd[0]; 6498 pqe[1] = pbd[1]; 6499 } 6500 6501 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size, 6502 dma_addr_t *dma_handle, gfp_t flags) 6503 { 6504 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags); 6505 } 6506 6507 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size, 6508 void *cpu_addr, dma_addr_t dma_handle) 6509 { 6510 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle); 6511 } 6512 6513 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, 6514 u32 timeout, u64 *result) 6515 { 6516 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6517 6518 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) { 6519 if (result) 6520 *result = 0; 6521 return 0; 6522 } 6523 6524 if (!timeout) 6525 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC; 6526 6527 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result); 6528 } 6529 6530 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size, 6531 gfp_t mem_flags, dma_addr_t *dma_handle) 6532 { 6533 if (size > GAUDI2_DMA_POOL_BLK_SIZE) 6534 return NULL; 6535 6536 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 6537 } 6538 6539 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 6540 { 6541 dma_pool_free(hdev->dma_pool, vaddr, dma_addr); 6542 } 6543 6544 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, 6545 dma_addr_t *dma_handle) 6546 { 6547 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 6548 } 6549 6550 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 6551 { 6552 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 6553 } 6554 6555 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, 6556 enum dma_data_direction dir) 6557 { 6558 dma_addr_t dma_addr; 6559 6560 dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); 6561 if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) 6562 return 0; 6563 6564 return dma_addr; 6565 } 6566 6567 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, 6568 enum dma_data_direction dir) 6569 { 6570 dma_unmap_single(&hdev->pdev->dev, addr, len, dir); 6571 } 6572 6573 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) 6574 { 6575 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 6576 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6577 6578 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 6579 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 6580 return -EINVAL; 6581 } 6582 6583 /* Just check if CB address is valid */ 6584 6585 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6586 parser->user_cb_size, 6587 asic_prop->sram_user_base_address, 6588 asic_prop->sram_end_address)) 6589 return 0; 6590 6591 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6592 parser->user_cb_size, 6593 asic_prop->dram_user_base_address, 6594 asic_prop->dram_end_address)) 6595 return 0; 6596 6597 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) && 6598 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6599 parser->user_cb_size, 6600 asic_prop->dmmu.start_addr, 6601 asic_prop->dmmu.end_addr)) 6602 return 0; 6603 6604 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) { 6605 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 6606 parser->user_cb_size, 6607 asic_prop->pmmu.start_addr, 6608 asic_prop->pmmu.end_addr) || 6609 hl_mem_area_inside_range( 6610 (u64) (uintptr_t) parser->user_cb, 6611 parser->user_cb_size, 6612 asic_prop->pmmu_huge.start_addr, 6613 asic_prop->pmmu_huge.end_addr)) 6614 return 0; 6615 6616 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) { 6617 if (!hdev->pdev) 6618 return 0; 6619 6620 if (!device_iommu_mapped(&hdev->pdev->dev)) 6621 return 0; 6622 } 6623 6624 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n", 6625 parser->user_cb, parser->user_cb_size); 6626 6627 return -EFAULT; 6628 } 6629 6630 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 6631 { 6632 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6633 6634 if (!parser->is_kernel_allocated_cb) 6635 return gaudi2_validate_cb_address(hdev, parser); 6636 6637 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) { 6638 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n"); 6639 return -EINVAL; 6640 } 6641 6642 return 0; 6643 } 6644 6645 static int gaudi2_send_heartbeat(struct hl_device *hdev) 6646 { 6647 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6648 6649 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6650 return 0; 6651 6652 return hl_fw_send_heartbeat(hdev); 6653 } 6654 6655 /* This is an internal helper function, used to update the KDMA mmu props. 6656 * Should be called with a proper kdma lock. 6657 */ 6658 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev, 6659 bool mmu_bypass, u32 asid) 6660 { 6661 u32 rw_asid, rw_mmu_bp; 6662 6663 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 6664 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 6665 6666 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) | 6667 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT); 6668 6669 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid); 6670 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp); 6671 } 6672 6673 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id, 6674 u32 mon_payload, u32 sync_value) 6675 { 6676 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm; 6677 u8 mask; 6678 6679 sob_offset = sob_id * 4; 6680 mon_offset = mon_id * 4; 6681 6682 /* Reset the SOB value */ 6683 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0); 6684 6685 /* Configure this address with CQ_ID 0 because CQ_EN is set */ 6686 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id); 6687 6688 /* Configure this address with CS index because CQ_EN is set */ 6689 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload); 6690 6691 sync_group_id = sob_id / 8; 6692 mask = ~(1 << (sob_id & 0x7)); 6693 mode = 1; /* comparison mode is "equal to" */ 6694 6695 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value); 6696 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode); 6697 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask); 6698 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id); 6699 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm); 6700 } 6701 6702 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */ 6703 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, 6704 u64 src_addr, u64 dst_addr, 6705 u32 size, bool is_memset) 6706 { 6707 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0; 6708 struct hl_cq_entry *cq_base; 6709 struct hl_cq *cq; 6710 u64 comp_addr; 6711 int rc; 6712 6713 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION, 6714 GAUDI2_RESERVED_MON_KDMA_COMPLETION, 6715 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1); 6716 6717 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + 6718 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32)); 6719 6720 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 6721 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 6722 6723 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr)); 6724 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr)); 6725 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr)); 6726 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr)); 6727 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr)); 6728 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr)); 6729 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val); 6730 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size); 6731 6732 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) | 6733 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1); 6734 6735 if (is_memset) 6736 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1); 6737 6738 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask); 6739 6740 /* Wait for completion */ 6741 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION]; 6742 cq_base = cq->kernel_address; 6743 polling_addr = (u32 *)&cq_base[cq->ci]; 6744 6745 if (hdev->pldm) 6746 /* for each 1MB 20 second of timeout */ 6747 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20; 6748 else 6749 timeout = KDMA_TIMEOUT_USEC; 6750 6751 /* Polling */ 6752 rc = hl_poll_timeout_memory( 6753 hdev, 6754 polling_addr, 6755 status, 6756 (status == 1), 6757 1000, 6758 timeout, 6759 true); 6760 6761 *polling_addr = 0; 6762 6763 if (rc) { 6764 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n"); 6765 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT); 6766 return rc; 6767 } 6768 6769 cq->ci = hl_cq_inc_ptr(cq->ci); 6770 6771 return 0; 6772 } 6773 6774 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val) 6775 { 6776 u32 i; 6777 6778 for (i = 0 ; i < size ; i += sizeof(u32)) 6779 WREG32(addr + i, val); 6780 } 6781 6782 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable) 6783 { 6784 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 6785 6786 if (enable) { 6787 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE); 6788 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0); 6789 } else { 6790 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED); 6791 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT); 6792 } 6793 } 6794 6795 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id) 6796 { 6797 u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 6798 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 6799 u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a; 6800 struct packet_msg_short *msg_short_pkt; 6801 dma_addr_t pkt_dma_addr; 6802 size_t pkt_size; 6803 int rc; 6804 6805 if (hdev->pldm) 6806 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC; 6807 else 6808 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC; 6809 6810 pkt_size = sizeof(*msg_short_pkt); 6811 msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr); 6812 if (!msg_short_pkt) { 6813 dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n", 6814 hw_queue_id); 6815 return -ENOMEM; 6816 } 6817 6818 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) | 6819 (1 << GAUDI2_PKT_CTL_EB_SHIFT) | 6820 (1 << GAUDI2_PKT_CTL_MB_SHIFT) | 6821 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) | 6822 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT); 6823 6824 msg_short_pkt->value = cpu_to_le32(sob_val); 6825 msg_short_pkt->ctl = cpu_to_le32(tmp); 6826 6827 /* Reset the SOB value */ 6828 WREG32(sob_addr, 0); 6829 6830 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 6831 if (rc) { 6832 dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n", 6833 hw_queue_id); 6834 goto free_pkt; 6835 } 6836 6837 rc = hl_poll_timeout( 6838 hdev, 6839 sob_addr, 6840 tmp, 6841 (tmp == sob_val), 6842 1000, 6843 timeout_usec); 6844 6845 if (rc == -ETIMEDOUT) { 6846 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 6847 hw_queue_id, tmp); 6848 rc = -EIO; 6849 } 6850 6851 /* Reset the SOB value */ 6852 WREG32(sob_addr, 0); 6853 6854 free_pkt: 6855 hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr); 6856 return rc; 6857 } 6858 6859 static int gaudi2_test_cpu_queue(struct hl_device *hdev) 6860 { 6861 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6862 6863 /* 6864 * check capability here as send_cpu_message() won't update the result 6865 * value if no capability 6866 */ 6867 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 6868 return 0; 6869 6870 return hl_fw_test_cpu_queue(hdev); 6871 } 6872 6873 static int gaudi2_test_queues(struct hl_device *hdev) 6874 { 6875 int i, rc, ret_val = 0; 6876 6877 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) { 6878 if (!gaudi2_is_queue_enabled(hdev, i)) 6879 continue; 6880 6881 gaudi2_qman_set_test_mode(hdev, i, true); 6882 rc = gaudi2_test_queue(hdev, i); 6883 gaudi2_qman_set_test_mode(hdev, i, false); 6884 6885 if (rc) { 6886 ret_val = -EINVAL; 6887 goto done; 6888 } 6889 } 6890 6891 rc = gaudi2_test_cpu_queue(hdev); 6892 if (rc) { 6893 ret_val = -EINVAL; 6894 goto done; 6895 } 6896 6897 done: 6898 return ret_val; 6899 } 6900 6901 static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6902 { 6903 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6904 size_t irq_arr_size; 6905 int rc; 6906 6907 gaudi2_init_arcs(hdev); 6908 6909 rc = gaudi2_scrub_arcs_dccm(hdev); 6910 if (rc) { 6911 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n"); 6912 return rc; 6913 } 6914 6915 gaudi2_init_security(hdev); 6916 6917 /* Unmask all IRQs since some could have been received during the soft reset */ 6918 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]); 6919 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size); 6920 } 6921 6922 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6923 struct engines_data *e) 6924 { 6925 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1; 6926 struct asic_fixed_properties *prop = &hdev->asic_prop; 6927 unsigned long *mask = (unsigned long *) mask_arr; 6928 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n"; 6929 bool is_idle = true, is_eng_idle; 6930 int engine_idx, i, j; 6931 u64 offset; 6932 6933 if (e) 6934 hl_engine_data_sprintf(e, 6935 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n" 6936 "---- ---- ------- ------------ ------------- -------------\n"); 6937 6938 for (i = 0; i < NUM_OF_DCORES; i++) { 6939 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) { 6940 int seq = i * NUM_OF_EDMA_PER_DCORE + j; 6941 6942 if (!(prop->edma_enabled_mask & BIT(seq))) 6943 continue; 6944 6945 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 + 6946 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 6947 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET; 6948 6949 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset); 6950 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset); 6951 6952 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset); 6953 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset); 6954 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset); 6955 6956 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6957 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1); 6958 is_idle &= is_eng_idle; 6959 6960 if (mask && !is_eng_idle) 6961 set_bit(engine_idx, mask); 6962 6963 if (e) 6964 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N", 6965 qm_glbl_sts0, dma_core_sts0, dma_core_sts1); 6966 } 6967 } 6968 6969 return is_idle; 6970 } 6971 6972 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6973 struct engines_data *e) 6974 { 6975 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1; 6976 unsigned long *mask = (unsigned long *) mask_arr; 6977 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n"; 6978 bool is_idle = true, is_eng_idle; 6979 int engine_idx, i; 6980 u64 offset; 6981 6982 if (e) 6983 hl_engine_data_sprintf(e, 6984 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n" 6985 "---- ------- ------------ ------------- -------------\n"); 6986 6987 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 6988 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; 6989 offset = i * PDMA_OFFSET; 6990 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset); 6991 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset); 6992 6993 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset); 6994 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset); 6995 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset); 6996 6997 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 6998 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1); 6999 is_idle &= is_eng_idle; 7000 7001 if (mask && !is_eng_idle) 7002 set_bit(engine_idx, mask); 7003 7004 if (e) 7005 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 7006 qm_glbl_sts0, dma_core_sts0, dma_core_sts1); 7007 } 7008 7009 return is_idle; 7010 } 7011 7012 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7013 struct engines_data *e) 7014 { 7015 unsigned long *mask = (unsigned long *) mask_arr; 7016 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n"; 7017 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 7018 bool is_idle = true, is_eng_idle; 7019 int engine_idx, i; 7020 u64 offset = 0; 7021 7022 /* NIC, twelve macros in Full chip */ 7023 if (e && hdev->nic_ports_mask) 7024 hl_engine_data_sprintf(e, 7025 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 7026 "--- ------- ------------ ----------\n"); 7027 7028 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 7029 if (!(i & 1)) 7030 offset = i / 2 * NIC_OFFSET; 7031 else 7032 offset += NIC_QM_OFFSET; 7033 7034 if (!(hdev->nic_ports_mask & BIT(i))) 7035 continue; 7036 7037 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i; 7038 7039 7040 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 7041 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset); 7042 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 7043 7044 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7045 is_idle &= is_eng_idle; 7046 7047 if (mask && !is_eng_idle) 7048 set_bit(engine_idx, mask); 7049 7050 if (e) 7051 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 7052 qm_glbl_sts0, qm_cgm_sts); 7053 } 7054 7055 return is_idle; 7056 } 7057 7058 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7059 struct engines_data *e) 7060 { 7061 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts; 7062 unsigned long *mask = (unsigned long *) mask_arr; 7063 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n"; 7064 bool is_idle = true, is_eng_idle; 7065 int engine_idx, i; 7066 u64 offset; 7067 7068 if (e) 7069 hl_engine_data_sprintf(e, 7070 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 7071 "--- ---- ------- ------------ ---------------\n"); 7072 /* MME, one per Dcore */ 7073 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 7074 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; 7075 offset = i * DCORE_OFFSET; 7076 7077 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset); 7078 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset); 7079 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset); 7080 7081 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7082 is_idle &= is_eng_idle; 7083 7084 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset); 7085 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 7086 is_idle &= is_eng_idle; 7087 7088 if (e) 7089 hl_engine_data_sprintf(e, mme_fmt, i, "N", 7090 is_eng_idle ? "Y" : "N", 7091 qm_glbl_sts0, 7092 mme_arch_sts); 7093 7094 if (mask && !is_eng_idle) 7095 set_bit(engine_idx, mask); 7096 } 7097 7098 return is_idle; 7099 } 7100 7101 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 7102 struct iterate_module_ctx *ctx) 7103 { 7104 struct gaudi2_tpc_idle_data *idle_data = ctx->data; 7105 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 7106 bool is_eng_idle; 7107 int engine_idx; 7108 7109 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1))) 7110 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7111 else 7112 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 + 7113 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst; 7114 7115 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset); 7116 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset); 7117 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset); 7118 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset); 7119 7120 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) && 7121 IS_TPC_IDLE(tpc_cfg_sts); 7122 *(idle_data->is_idle) &= is_eng_idle; 7123 7124 if (idle_data->mask && !is_eng_idle) 7125 set_bit(engine_idx, idle_data->mask); 7126 7127 if (idle_data->e) 7128 hl_engine_data_sprintf(idle_data->e, 7129 idle_data->tpc_fmt, dcore, inst, 7130 is_eng_idle ? "Y" : "N", 7131 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 7132 } 7133 7134 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7135 struct engines_data *e) 7136 { 7137 struct asic_fixed_properties *prop = &hdev->asic_prop; 7138 unsigned long *mask = (unsigned long *) mask_arr; 7139 bool is_idle = true; 7140 7141 struct gaudi2_tpc_idle_data tpc_idle_data = { 7142 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 7143 .e = e, 7144 .mask = mask, 7145 .is_idle = &is_idle, 7146 }; 7147 struct iterate_module_ctx tpc_iter = { 7148 .fn = &gaudi2_is_tpc_engine_idle, 7149 .data = &tpc_idle_data, 7150 }; 7151 7152 if (e && prop->tpc_enabled_mask) 7153 hl_engine_data_sprintf(e, 7154 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n" 7155 "---- --- ------- ------------ ---------- ------\n"); 7156 7157 gaudi2_iterate_tpcs(hdev, &tpc_iter); 7158 7159 return tpc_idle_data.is_idle; 7160 } 7161 7162 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7163 struct engines_data *e) 7164 { 7165 struct asic_fixed_properties *prop = &hdev->asic_prop; 7166 unsigned long *mask = (unsigned long *) mask_arr; 7167 const char *pcie_dec_fmt = "%-10d%-9s%#x\n"; 7168 const char *dec_fmt = "%-6d%-5d%-9s%#x\n"; 7169 bool is_idle = true, is_eng_idle; 7170 u32 dec_swreg15, dec_enabled_bit; 7171 int engine_idx, i, j; 7172 u64 offset; 7173 7174 /* Decoders, two each Dcore and two shared PCIe decoders */ 7175 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 7176 hl_engine_data_sprintf(e, 7177 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 7178 "---- --- ------- ---------------\n"); 7179 7180 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 7181 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) { 7182 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j); 7183 if (!(prop->decoder_enabled_mask & dec_enabled_bit)) 7184 continue; 7185 7186 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 + 7187 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j; 7188 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET; 7189 7190 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset); 7191 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 7192 is_idle &= is_eng_idle; 7193 7194 if (mask && !is_eng_idle) 7195 set_bit(engine_idx, mask); 7196 7197 if (e) 7198 hl_engine_data_sprintf(e, dec_fmt, i, j, 7199 is_eng_idle ? "Y" : "N", dec_swreg15); 7200 } 7201 } 7202 7203 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 7204 hl_engine_data_sprintf(e, 7205 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 7206 "-------- ------- ---------------\n"); 7207 7208 /* Check shared(PCIe) decoders */ 7209 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) { 7210 dec_enabled_bit = PCIE_DEC_SHIFT + i; 7211 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit))) 7212 continue; 7213 7214 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i; 7215 offset = i * DCORE_DEC_OFFSET; 7216 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset); 7217 is_eng_idle = IS_DEC_IDLE(dec_swreg15); 7218 is_idle &= is_eng_idle; 7219 7220 if (mask && !is_eng_idle) 7221 set_bit(engine_idx, mask); 7222 7223 if (e) 7224 hl_engine_data_sprintf(e, pcie_dec_fmt, i, 7225 is_eng_idle ? "Y" : "N", dec_swreg15); 7226 } 7227 7228 return is_idle; 7229 } 7230 7231 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7232 struct engines_data *e) 7233 { 7234 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n"; 7235 unsigned long *mask = (unsigned long *) mask_arr; 7236 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 7237 bool is_idle = true, is_eng_idle; 7238 int engine_idx, i; 7239 u64 offset; 7240 7241 if (e) 7242 hl_engine_data_sprintf(e, 7243 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n" 7244 "---- --- ------- ------------ ------------ ----------\n"); 7245 7246 for (i = 0 ; i < NUM_OF_ROT ; i++) { 7247 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i; 7248 7249 offset = i * ROT_OFFSET; 7250 7251 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset); 7252 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset); 7253 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset); 7254 7255 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7256 is_idle &= is_eng_idle; 7257 7258 if (mask && !is_eng_idle) 7259 set_bit(engine_idx, mask); 7260 7261 if (e) 7262 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 7263 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts); 7264 } 7265 7266 return is_idle; 7267 } 7268 7269 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 7270 struct engines_data *e) 7271 { 7272 bool is_idle = true; 7273 7274 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e); 7275 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e); 7276 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e); 7277 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e); 7278 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e); 7279 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e); 7280 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e); 7281 7282 return is_idle; 7283 } 7284 7285 static void gaudi2_hw_queues_lock(struct hl_device *hdev) 7286 __acquires(&gaudi2->hw_queues_lock) 7287 { 7288 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7289 7290 spin_lock(&gaudi2->hw_queues_lock); 7291 } 7292 7293 static void gaudi2_hw_queues_unlock(struct hl_device *hdev) 7294 __releases(&gaudi2->hw_queues_lock) 7295 { 7296 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7297 7298 spin_unlock(&gaudi2->hw_queues_lock); 7299 } 7300 7301 static u32 gaudi2_get_pci_id(struct hl_device *hdev) 7302 { 7303 return hdev->pdev->device; 7304 } 7305 7306 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) 7307 { 7308 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7309 7310 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 7311 return 0; 7312 7313 return hl_fw_get_eeprom_data(hdev, data, max_size); 7314 } 7315 7316 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val) 7317 { 7318 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 7319 } 7320 7321 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7322 { 7323 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7324 7325 if (aggregate) { 7326 *size = (u32) sizeof(gaudi2->events_stat_aggregate); 7327 return gaudi2->events_stat_aggregate; 7328 } 7329 7330 *size = (u32) sizeof(gaudi2->events_stat); 7331 return gaudi2->events_stat; 7332 } 7333 7334 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id, 7335 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 7336 { 7337 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) * 7338 dcore_vdec_id + DCORE_OFFSET * dcore_id; 7339 7340 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 7341 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 7342 7343 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 7344 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 7345 7346 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 7347 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 7348 7349 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 7350 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 7351 7352 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 7353 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 7354 } 7355 7356 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid) 7357 { 7358 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 7359 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 7360 struct asic_fixed_properties *prop = &hdev->asic_prop; 7361 u32 dcore_offset = dcore_id * DCORE_OFFSET; 7362 u32 vdec_id, i, ports_offset, reg_val; 7363 u8 edma_seq_base; 7364 7365 /* EDMA */ 7366 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE; 7367 if (prop->edma_enabled_mask & BIT(edma_seq_base)) { 7368 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 7369 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 7370 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 7371 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 7372 } 7373 7374 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) { 7375 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 7376 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 7377 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid); 7378 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0); 7379 } 7380 7381 /* Sync Mngr */ 7382 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid); 7383 /* 7384 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID 7385 * for any access type 7386 */ 7387 if (dcore_id > 0) { 7388 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) | 7389 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT); 7390 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val); 7391 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0); 7392 } 7393 7394 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0); 7395 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid); 7396 7397 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) { 7398 ports_offset = i * DCORE_MME_SBTE_OFFSET; 7399 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP + 7400 dcore_offset + ports_offset, 0); 7401 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID + 7402 dcore_offset + ports_offset, rw_asid); 7403 } 7404 7405 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) { 7406 ports_offset = i * DCORE_MME_WB_OFFSET; 7407 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP + 7408 dcore_offset + ports_offset, 0); 7409 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID + 7410 dcore_offset + ports_offset, rw_asid); 7411 } 7412 7413 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0); 7414 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid); 7415 7416 /* 7417 * Decoders 7418 */ 7419 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) { 7420 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id)) 7421 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0); 7422 } 7423 } 7424 7425 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev, 7426 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp) 7427 { 7428 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id; 7429 7430 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp); 7431 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid); 7432 7433 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp); 7434 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid); 7435 7436 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp); 7437 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid); 7438 7439 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp); 7440 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid); 7441 7442 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp); 7443 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid); 7444 } 7445 7446 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id, 7447 u32 rw_asid, u32 rw_mmu_bp) 7448 { 7449 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id; 7450 7451 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp); 7452 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid); 7453 } 7454 7455 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid) 7456 { 7457 u32 reg_base, reg_offset, reg_val = 0; 7458 7459 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 7460 7461 /* Enable MMU and configure asid for all relevant ARC regions */ 7462 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0); 7463 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid); 7464 7465 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL); 7466 WREG32(reg_base + reg_offset, reg_val); 7467 7468 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW); 7469 WREG32(reg_base + reg_offset, reg_val); 7470 7471 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA); 7472 WREG32(reg_base + reg_offset, reg_val); 7473 7474 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA); 7475 WREG32(reg_base + reg_offset, reg_val); 7476 7477 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA); 7478 WREG32(reg_base + reg_offset, reg_val); 7479 7480 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE); 7481 WREG32(reg_base + reg_offset, reg_val); 7482 7483 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL); 7484 WREG32(reg_base + reg_offset, reg_val); 7485 7486 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL); 7487 WREG32(reg_base + reg_offset, reg_val); 7488 7489 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL); 7490 WREG32(reg_base + reg_offset, reg_val); 7491 7492 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL); 7493 WREG32(reg_base + reg_offset, reg_val); 7494 7495 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL); 7496 WREG32(reg_base + reg_offset, reg_val); 7497 } 7498 7499 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid) 7500 { 7501 int i; 7502 7503 if (hdev->fw_components & FW_TYPE_BOOT_CPU) 7504 return hl_fw_cpucp_engine_core_asid_set(hdev, asid); 7505 7506 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 7507 gaudi2_arc_mmu_prepare(hdev, i, asid); 7508 7509 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) { 7510 if (!gaudi2_is_queue_enabled(hdev, i)) 7511 continue; 7512 7513 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid); 7514 } 7515 7516 return 0; 7517 } 7518 7519 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) 7520 { 7521 struct asic_fixed_properties *prop = &hdev->asic_prop; 7522 u32 rw_asid, offset; 7523 int rc, i; 7524 7525 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) | 7526 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid); 7527 7528 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 7529 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 7530 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid); 7531 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0); 7532 7533 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid); 7534 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0); 7535 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid); 7536 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0); 7537 7538 /* ROT */ 7539 for (i = 0 ; i < NUM_OF_ROT ; i++) { 7540 offset = i * ROT_OFFSET; 7541 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid); 7542 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 7543 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK); 7544 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK); 7545 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK); 7546 } 7547 7548 /* Shared Decoders are the last bits in the decoders mask */ 7549 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0)) 7550 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0); 7551 7552 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1)) 7553 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0); 7554 7555 /* arc farm arc dup eng */ 7556 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++) 7557 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0); 7558 7559 rc = gaudi2_arc_mmu_prepare_all(hdev, asid); 7560 if (rc) 7561 return rc; 7562 7563 return 0; 7564 } 7565 7566 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 7567 struct iterate_module_ctx *ctx) 7568 { 7569 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 7570 7571 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 7572 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); 7573 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0); 7574 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid); 7575 } 7576 7577 /* zero the MMUBP and set the ASID */ 7578 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid) 7579 { 7580 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7581 struct gaudi2_tpc_mmu_data tpc_mmu_data; 7582 struct iterate_module_ctx tpc_iter = { 7583 .fn = &gaudi2_tpc_mmu_prepare, 7584 .data = &tpc_mmu_data, 7585 }; 7586 int rc, i; 7587 7588 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) { 7589 dev_crit(hdev->dev, "asid %u is too big\n", asid); 7590 return -EINVAL; 7591 } 7592 7593 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK)) 7594 return 0; 7595 7596 rc = gaudi2_mmu_shared_prepare(hdev, asid); 7597 if (rc) 7598 return rc; 7599 7600 /* configure DCORE MMUs */ 7601 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) | 7602 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT); 7603 gaudi2_iterate_tpcs(hdev, &tpc_iter); 7604 for (i = 0 ; i < NUM_OF_DCORES ; i++) 7605 gaudi2_mmu_dcore_prepare(hdev, i, asid); 7606 7607 return 0; 7608 } 7609 7610 static inline bool is_info_event(u32 event) 7611 { 7612 switch (event) { 7613 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 7614 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 7615 7616 /* return in case of NIC status event - these events are received periodically and not as 7617 * an indication to an error. 7618 */ 7619 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: 7620 return true; 7621 default: 7622 return false; 7623 } 7624 } 7625 7626 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, 7627 bool ratelimited, const char *fmt, ...) 7628 { 7629 struct va_format vaf; 7630 va_list args; 7631 7632 va_start(args, fmt); 7633 vaf.fmt = fmt; 7634 vaf.va = &args; 7635 7636 if (ratelimited) 7637 dev_err_ratelimited(hdev->dev, "%s: %pV\n", 7638 gaudi2_irq_map_table[event_type].valid ? 7639 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 7640 else 7641 dev_err(hdev->dev, "%s: %pV\n", 7642 gaudi2_irq_map_table[event_type].valid ? 7643 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf); 7644 7645 va_end(args); 7646 } 7647 7648 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7649 struct hl_eq_ecc_data *ecc_data) 7650 { 7651 u64 ecc_address = 0, ecc_syndrom = 0; 7652 u8 memory_wrapper_idx = 0; 7653 7654 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7655 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7656 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7657 7658 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, 7659 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.", 7660 ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); 7661 7662 return !!ecc_data->is_critical; 7663 } 7664 7665 /* 7666 * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 7667 * 7668 * @idx: the current pi/ci value 7669 * @q_len: the queue length (power of 2) 7670 * 7671 * @return the cyclically decremented index 7672 */ 7673 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len) 7674 { 7675 u32 mask = q_len - 1; 7676 7677 /* 7678 * modular decrement is equivalent to adding (queue_size -1) 7679 * later we take LSBs to make sure the value is in the 7680 * range [0, queue_len - 1] 7681 */ 7682 return (idx + q_len - 1) & mask; 7683 } 7684 7685 /** 7686 * gaudi2_print_sw_config_stream_data - print SW config stream data 7687 * 7688 * @hdev: pointer to the habanalabs device structure 7689 * @stream: the QMAN's stream 7690 * @qman_base: base address of QMAN registers block 7691 */ 7692 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev, 7693 u32 stream, u64 qman_base) 7694 { 7695 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 7696 u32 cq_ptr_lo_off, size; 7697 7698 cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0; 7699 7700 cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) + 7701 stream * cq_ptr_lo_off; 7702 7703 cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 7704 7705 cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0); 7706 7707 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 7708 size = RREG32(cq_tsize); 7709 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n", 7710 stream, cq_ptr, size); 7711 } 7712 7713 /** 7714 * gaudi2_print_last_pqes_on_err - print last PQEs on error 7715 * 7716 * @hdev: pointer to the habanalabs device structure 7717 * @qid_base: first QID of the QMAN (out of 4 streams) 7718 * @stream: the QMAN's stream 7719 * @qman_base: base address of QMAN registers block 7720 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 7721 */ 7722 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, 7723 u64 qman_base, bool pr_sw_conf) 7724 { 7725 u32 ci, qm_ci_stream_off; 7726 struct hl_hw_queue *q; 7727 u64 pq_ci; 7728 int i; 7729 7730 q = &hdev->kernel_queues[qid_base + stream]; 7731 7732 qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0; 7733 pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) + 7734 stream * qm_ci_stream_off; 7735 7736 hdev->asic_funcs->hw_queues_lock(hdev); 7737 7738 if (pr_sw_conf) 7739 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7740 7741 ci = RREG32(pq_ci); 7742 7743 /* we should start printing form ci -1 */ 7744 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 7745 7746 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 7747 struct hl_bd *bd; 7748 u64 addr; 7749 u32 len; 7750 7751 bd = q->kernel_address; 7752 bd += ci; 7753 7754 len = le32_to_cpu(bd->len); 7755 /* len 0 means uninitialized entry- break */ 7756 if (!len) 7757 break; 7758 7759 addr = le64_to_cpu(bd->ptr); 7760 7761 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n", 7762 stream, ci, addr, len); 7763 7764 /* get previous ci, wrap if needed */ 7765 ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH); 7766 } 7767 7768 hdev->asic_funcs->hw_queues_unlock(hdev); 7769 } 7770 7771 /** 7772 * print_qman_data_on_err - extract QMAN data on error 7773 * 7774 * @hdev: pointer to the habanalabs device structure 7775 * @qid_base: first QID of the QMAN (out of 4 streams) 7776 * @stream: the QMAN's stream 7777 * @qman_base: base address of QMAN registers block 7778 * 7779 * This function attempt to extract as much data as possible on QMAN error. 7780 * On upper CP print the SW config stream data and last 8 PQEs. 7781 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 7782 */ 7783 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base) 7784 { 7785 u32 i; 7786 7787 if (stream != QMAN_STREAMS) { 7788 gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true); 7789 return; 7790 } 7791 7792 gaudi2_print_sw_config_stream_data(hdev, stream, qman_base); 7793 7794 for (i = 0 ; i < QMAN_STREAMS ; i++) 7795 gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false); 7796 } 7797 7798 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, 7799 u64 qman_base, u32 qid_base) 7800 { 7801 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; 7802 u64 glbl_sts_addr, arb_err_addr; 7803 char reg_desc[32]; 7804 7805 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE); 7806 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE); 7807 7808 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */ 7809 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 7810 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 7811 7812 if (!glbl_sts_val) 7813 continue; 7814 7815 if (i == QMAN_STREAMS) { 7816 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 7817 num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE; 7818 } else { 7819 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 7820 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE; 7821 } 7822 7823 for (j = 0 ; j < num_error_causes ; j++) 7824 if (glbl_sts_val & BIT(j)) { 7825 gaudi2_print_event(hdev, event_type, true, 7826 "%s. err cause: %s", reg_desc, 7827 i == QMAN_STREAMS ? 7828 gaudi2_qman_lower_cp_error_cause[j] : 7829 gaudi2_qman_error_cause[j]); 7830 error_count++; 7831 } 7832 7833 print_qman_data_on_err(hdev, qid_base, i, qman_base); 7834 } 7835 7836 arb_err_val = RREG32(arb_err_addr); 7837 7838 if (!arb_err_val) 7839 goto out; 7840 7841 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7842 if (arb_err_val & BIT(j)) { 7843 gaudi2_print_event(hdev, event_type, true, 7844 "ARB_ERR. err cause: %s", 7845 gaudi2_qman_arb_error_cause[j]); 7846 error_count++; 7847 } 7848 } 7849 7850 out: 7851 return error_count; 7852 } 7853 7854 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, 7855 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7856 enum gaudi2_engine_id id, u64 *event_mask) 7857 { 7858 u32 razwi_hi, razwi_lo, razwi_xy; 7859 u16 eng_id = id; 7860 u8 rd_wr_flag; 7861 7862 if (is_write) { 7863 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI); 7864 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO); 7865 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY); 7866 rd_wr_flag = HL_RAZWI_WRITE; 7867 } else { 7868 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); 7869 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO); 7870 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY); 7871 rd_wr_flag = HL_RAZWI_READ; 7872 } 7873 7874 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1, 7875 rd_wr_flag | HL_RAZWI_HBW, event_mask); 7876 7877 dev_err_ratelimited(hdev->dev, 7878 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7879 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7880 } 7881 7882 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, 7883 u64 rtr_mstr_if_base_addr, bool is_write, char *name, 7884 enum gaudi2_engine_id id, u64 *event_mask) 7885 { 7886 u64 razwi_addr = CFG_BASE; 7887 u32 razwi_xy; 7888 u16 eng_id = id; 7889 u8 rd_wr_flag; 7890 7891 if (is_write) { 7892 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI); 7893 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY); 7894 rd_wr_flag = HL_RAZWI_WRITE; 7895 } else { 7896 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI); 7897 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY); 7898 rd_wr_flag = HL_RAZWI_READ; 7899 } 7900 7901 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask); 7902 dev_err_ratelimited(hdev->dev, 7903 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n", 7904 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr, 7905 razwi_xy); 7906 } 7907 7908 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, 7909 enum razwi_event_sources module, u8 module_idx) 7910 { 7911 switch (module) { 7912 case RAZWI_TPC: 7913 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES)) 7914 return GAUDI2_DCORE0_ENGINE_ID_TPC_6; 7915 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7916 (module_idx % NUM_OF_TPC_PER_DCORE) + 7917 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7918 7919 case RAZWI_MME: 7920 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) + 7921 (module_idx * ENGINE_ID_DCORE_OFFSET)); 7922 7923 case RAZWI_EDMA: 7924 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7925 (module_idx % NUM_OF_EDMA_PER_DCORE)); 7926 7927 case RAZWI_PDMA: 7928 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx); 7929 7930 case RAZWI_NIC: 7931 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx)); 7932 7933 case RAZWI_DEC: 7934 if (module_idx == 8) 7935 return GAUDI2_PCIE_ENGINE_ID_DEC_0; 7936 7937 if (module_idx == 9) 7938 return GAUDI2_PCIE_ENGINE_ID_DEC_1; 7939 ; 7940 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) + 7941 (module_idx % NUM_OF_DEC_PER_DCORE) + 7942 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)); 7943 7944 case RAZWI_ROT: 7945 return GAUDI2_ENGINE_ID_ROT_0 + module_idx; 7946 7947 default: 7948 return GAUDI2_ENGINE_ID_SIZE; 7949 } 7950 } 7951 7952 /* 7953 * This function handles RR(Range register) hit events. 7954 * raised be initiators not PSOC RAZWI. 7955 */ 7956 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, 7957 enum razwi_event_sources module, u8 module_idx, 7958 u8 module_sub_idx, u64 *event_mask) 7959 { 7960 bool via_sft = false; 7961 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id; 7962 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr; 7963 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0; 7964 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0; 7965 char initiator_name[64]; 7966 7967 switch (module) { 7968 case RAZWI_TPC: 7969 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; 7970 7971 if (hl_is_fw_ver_below_1_9(hdev) && 7972 !hdev->asic_prop.fw_security_enabled && 7973 ((module_idx == 0) || (module_idx == 1))) 7974 lbw_rtr_id = DCORE0_RTR0; 7975 else 7976 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx]; 7977 sprintf(initiator_name, "TPC_%u", module_idx); 7978 break; 7979 case RAZWI_MME: 7980 sprintf(initiator_name, "MME_%u", module_idx); 7981 switch (module_sub_idx) { 7982 case MME_WAP0: 7983 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0; 7984 break; 7985 case MME_WAP1: 7986 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1; 7987 break; 7988 case MME_WRITE: 7989 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write; 7990 break; 7991 case MME_READ: 7992 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read; 7993 break; 7994 case MME_SBTE0: 7995 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0; 7996 break; 7997 case MME_SBTE1: 7998 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1; 7999 break; 8000 case MME_SBTE2: 8001 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2; 8002 break; 8003 case MME_SBTE3: 8004 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3; 8005 break; 8006 case MME_SBTE4: 8007 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4; 8008 break; 8009 default: 8010 return; 8011 } 8012 lbw_rtr_id = hbw_rtr_id; 8013 break; 8014 case RAZWI_EDMA: 8015 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx]; 8016 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE; 8017 /* SFT has separate MSTR_IF for LBW, only there we can 8018 * read the LBW razwi related registers 8019 */ 8020 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE + 8021 dcore_id * SFT_DCORE_OFFSET; 8022 via_sft = true; 8023 sprintf(initiator_name, "EDMA_%u", module_idx); 8024 break; 8025 case RAZWI_PDMA: 8026 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx]; 8027 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx]; 8028 sprintf(initiator_name, "PDMA_%u", module_idx); 8029 break; 8030 case RAZWI_NIC: 8031 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx]; 8032 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx]; 8033 sprintf(initiator_name, "NIC_%u", module_idx); 8034 break; 8035 case RAZWI_DEC: 8036 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx]; 8037 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx]; 8038 sprintf(initiator_name, "DEC_%u", module_idx); 8039 break; 8040 case RAZWI_ROT: 8041 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx]; 8042 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx]; 8043 sprintf(initiator_name, "ROT_%u", module_idx); 8044 break; 8045 default: 8046 return; 8047 } 8048 8049 /* Find router mstr_if register base */ 8050 if (!via_sft) { 8051 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE; 8052 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE; 8053 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE + 8054 dcore_id * DCORE_OFFSET + 8055 dcore_rtr_id * DCORE_RTR_OFFSET + 8056 RTR_MSTR_IF_OFFSET; 8057 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr + 8058 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET); 8059 } 8060 8061 /* Find out event cause by reading "RAZWI_HAPPENED" registers */ 8062 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED); 8063 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED); 8064 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED); 8065 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED); 8066 8067 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx); 8068 if (hbw_shrd_aw) { 8069 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true, 8070 initiator_name, eng_id, event_mask); 8071 8072 /* Clear event indication */ 8073 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw); 8074 } 8075 8076 if (hbw_shrd_ar) { 8077 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false, 8078 initiator_name, eng_id, event_mask); 8079 8080 /* Clear event indication */ 8081 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar); 8082 } 8083 8084 if (lbw_shrd_aw) { 8085 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true, 8086 initiator_name, eng_id, event_mask); 8087 8088 /* Clear event indication */ 8089 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw); 8090 } 8091 8092 if (lbw_shrd_ar) { 8093 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false, 8094 initiator_name, eng_id, event_mask); 8095 8096 /* Clear event indication */ 8097 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar); 8098 } 8099 } 8100 8101 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) 8102 { 8103 struct asic_fixed_properties *prop = &hdev->asic_prop; 8104 u8 mod_idx, sub_mod; 8105 8106 /* check all TPCs */ 8107 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) { 8108 if (prop->tpc_enabled_mask & BIT(mod_idx)) 8109 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL); 8110 } 8111 8112 /* check all MMEs */ 8113 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 8114 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++) 8115 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx, 8116 sub_mod, NULL); 8117 8118 /* check all EDMAs */ 8119 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++) 8120 if (prop->edma_enabled_mask & BIT(mod_idx)) 8121 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL); 8122 8123 /* check all PDMAs */ 8124 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++) 8125 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL); 8126 8127 /* check all NICs */ 8128 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++) 8129 if (hdev->nic_ports_mask & BIT(mod_idx)) 8130 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0, 8131 NULL); 8132 8133 /* check all DECs */ 8134 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++) 8135 if (prop->decoder_enabled_mask & BIT(mod_idx)) 8136 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL); 8137 8138 /* check all ROTs */ 8139 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++) 8140 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); 8141 } 8142 8143 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size, 8144 u32 axuser_xy, u32 *base, u16 *eng_id, 8145 char *eng_name) 8146 { 8147 8148 int i, num_of_eng = 0; 8149 u16 str_size = 0; 8150 8151 for (i = 0 ; i < array_size ; i++) { 8152 if (axuser_xy != razwi_info[i].axuser_xy) 8153 continue; 8154 8155 eng_id[num_of_eng] = razwi_info[i].eng_id; 8156 base[num_of_eng] = razwi_info[i].rtr_ctrl; 8157 if (!num_of_eng) 8158 str_size += snprintf(eng_name + str_size, 8159 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s", 8160 razwi_info[i].eng_name); 8161 else 8162 str_size += snprintf(eng_name + str_size, 8163 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s", 8164 razwi_info[i].eng_name); 8165 num_of_eng++; 8166 } 8167 8168 return num_of_eng; 8169 } 8170 8171 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg, 8172 u64 *event_mask) 8173 { 8174 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0; 8175 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR]; 8176 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR]; 8177 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE]; 8178 bool razwi_happened = false; 8179 int i; 8180 8181 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info), 8182 axuser_xy, base, eng_id, eng_name_str); 8183 8184 /* If no match for XY coordinates, try to find it in MME razwi table */ 8185 if (!num_of_eng) { 8186 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg); 8187 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info, 8188 ARRAY_SIZE(mme_razwi_info), 8189 axuser_xy, base, eng_id, 8190 eng_name_str); 8191 } 8192 8193 for (i = 0 ; i < num_of_eng ; i++) { 8194 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) { 8195 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI); 8196 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO); 8197 dev_err(hdev->dev, 8198 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", 8199 eng_name_str, ((u64)addr_hi << 32) + addr_lo); 8200 hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0], 8201 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask); 8202 razwi_happened = true; 8203 } 8204 8205 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) { 8206 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI); 8207 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO); 8208 dev_err(hdev->dev, 8209 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", 8210 eng_name_str, ((u64)addr_hi << 32) + addr_lo); 8211 hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0], 8212 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask); 8213 razwi_happened = true; 8214 } 8215 8216 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) { 8217 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR); 8218 dev_err(hdev->dev, 8219 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n", 8220 eng_name_str, addr_lo); 8221 hl_handle_razwi(hdev, addr_lo, &eng_id[0], 8222 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask); 8223 razwi_happened = true; 8224 } 8225 8226 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) { 8227 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR); 8228 dev_err(hdev->dev, 8229 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n", 8230 eng_name_str, addr_lo); 8231 hl_handle_razwi(hdev, addr_lo, &eng_id[0], 8232 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask); 8233 razwi_happened = true; 8234 } 8235 /* In common case the loop will break, when there is only one engine id, or 8236 * several engines with the same router. The exceptional case is with psoc razwi 8237 * from EDMA, where it's possible to get axuser id which fits 2 routers (2 8238 * interfaces of sft router). In this case, maybe the first router won't hold info 8239 * and we will need to iterate on the other router. 8240 */ 8241 if (razwi_happened) 8242 break; 8243 } 8244 8245 return razwi_happened; 8246 } 8247 8248 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ 8249 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask) 8250 { 8251 u32 razwi_mask_info, razwi_intr = 0, error_count = 0; 8252 8253 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) { 8254 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT); 8255 if (!razwi_intr) 8256 return 0; 8257 } 8258 8259 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 8260 8261 dev_err_ratelimited(hdev->dev, 8262 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 8263 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 8264 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 8265 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 8266 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info), 8267 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 8268 8269 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask)) 8270 error_count++; 8271 else 8272 dev_err_ratelimited(hdev->dev, 8273 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n", 8274 razwi_mask_info); 8275 8276 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ 8277 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) 8278 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr); 8279 8280 return error_count; 8281 } 8282 8283 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type) 8284 { 8285 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8286 8287 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET); 8288 8289 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) { 8290 if (sts_val & BIT(i)) { 8291 gaudi2_print_event(hdev, event_type, true, 8292 "err cause: %s", gaudi2_qm_sei_error_cause[i]); 8293 sts_clr_val |= BIT(i); 8294 error_count++; 8295 } 8296 } 8297 8298 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val); 8299 8300 return error_count; 8301 } 8302 8303 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type, 8304 bool extended_err_check, u64 *event_mask) 8305 { 8306 enum razwi_event_sources module; 8307 u32 error_count = 0; 8308 u64 qman_base; 8309 u8 index; 8310 8311 switch (event_type) { 8312 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP: 8313 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 8314 qman_base = mmDCORE0_TPC0_QM_BASE + 8315 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET + 8316 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET; 8317 module = RAZWI_TPC; 8318 break; 8319 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 8320 qman_base = mmDCORE0_TPC6_QM_BASE; 8321 module = RAZWI_TPC; 8322 break; 8323 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 8324 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 8325 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 8326 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 8327 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 8328 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 8329 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 8330 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET; 8331 module = RAZWI_MME; 8332 break; 8333 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 8334 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 8335 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP; 8336 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET; 8337 module = RAZWI_PDMA; 8338 break; 8339 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 8340 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 8341 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 8342 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET; 8343 module = RAZWI_ROT; 8344 break; 8345 default: 8346 return 0; 8347 } 8348 8349 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 8350 8351 /* There is a single event per NIC macro, so should check its both QMAN blocks */ 8352 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE && 8353 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE) 8354 error_count += _gaudi2_handle_qm_sei_err(hdev, 8355 qman_base + NIC_QM_OFFSET, event_type); 8356 8357 if (extended_err_check) { 8358 /* check if RAZWI happened */ 8359 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask); 8360 hl_check_for_glbl_errors(hdev); 8361 } 8362 8363 return error_count; 8364 } 8365 8366 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8367 { 8368 u32 qid_base, error_count = 0; 8369 u64 qman_base; 8370 u8 index = 0; 8371 8372 switch (event_type) { 8373 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM: 8374 index = event_type - GAUDI2_EVENT_TPC0_QM; 8375 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS; 8376 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8377 break; 8378 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM: 8379 index = event_type - GAUDI2_EVENT_TPC6_QM; 8380 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS; 8381 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8382 break; 8383 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM: 8384 index = event_type - GAUDI2_EVENT_TPC12_QM; 8385 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS; 8386 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8387 break; 8388 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM: 8389 index = event_type - GAUDI2_EVENT_TPC18_QM; 8390 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS; 8391 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET; 8392 break; 8393 case GAUDI2_EVENT_TPC24_QM: 8394 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0; 8395 qman_base = mmDCORE0_TPC6_QM_BASE; 8396 break; 8397 case GAUDI2_EVENT_MME0_QM: 8398 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0; 8399 qman_base = mmDCORE0_MME_QM_BASE; 8400 break; 8401 case GAUDI2_EVENT_MME1_QM: 8402 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0; 8403 qman_base = mmDCORE1_MME_QM_BASE; 8404 break; 8405 case GAUDI2_EVENT_MME2_QM: 8406 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0; 8407 qman_base = mmDCORE2_MME_QM_BASE; 8408 break; 8409 case GAUDI2_EVENT_MME3_QM: 8410 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0; 8411 qman_base = mmDCORE3_MME_QM_BASE; 8412 break; 8413 case GAUDI2_EVENT_HDMA0_QM: 8414 index = 0; 8415 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0; 8416 qman_base = mmDCORE0_EDMA0_QM_BASE; 8417 break; 8418 case GAUDI2_EVENT_HDMA1_QM: 8419 index = 1; 8420 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0; 8421 qman_base = mmDCORE0_EDMA1_QM_BASE; 8422 break; 8423 case GAUDI2_EVENT_HDMA2_QM: 8424 index = 2; 8425 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0; 8426 qman_base = mmDCORE1_EDMA0_QM_BASE; 8427 break; 8428 case GAUDI2_EVENT_HDMA3_QM: 8429 index = 3; 8430 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0; 8431 qman_base = mmDCORE1_EDMA1_QM_BASE; 8432 break; 8433 case GAUDI2_EVENT_HDMA4_QM: 8434 index = 4; 8435 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0; 8436 qman_base = mmDCORE2_EDMA0_QM_BASE; 8437 break; 8438 case GAUDI2_EVENT_HDMA5_QM: 8439 index = 5; 8440 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0; 8441 qman_base = mmDCORE2_EDMA1_QM_BASE; 8442 break; 8443 case GAUDI2_EVENT_HDMA6_QM: 8444 index = 6; 8445 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0; 8446 qman_base = mmDCORE3_EDMA0_QM_BASE; 8447 break; 8448 case GAUDI2_EVENT_HDMA7_QM: 8449 index = 7; 8450 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0; 8451 qman_base = mmDCORE3_EDMA1_QM_BASE; 8452 break; 8453 case GAUDI2_EVENT_PDMA0_QM: 8454 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0; 8455 qman_base = mmPDMA0_QM_BASE; 8456 break; 8457 case GAUDI2_EVENT_PDMA1_QM: 8458 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0; 8459 qman_base = mmPDMA1_QM_BASE; 8460 break; 8461 case GAUDI2_EVENT_ROTATOR0_ROT0_QM: 8462 qid_base = GAUDI2_QUEUE_ID_ROT_0_0; 8463 qman_base = mmROT0_QM_BASE; 8464 break; 8465 case GAUDI2_EVENT_ROTATOR1_ROT1_QM: 8466 qid_base = GAUDI2_QUEUE_ID_ROT_1_0; 8467 qman_base = mmROT1_QM_BASE; 8468 break; 8469 default: 8470 return 0; 8471 } 8472 8473 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); 8474 8475 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ 8476 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) { 8477 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type); 8478 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask); 8479 } 8480 8481 hl_check_for_glbl_errors(hdev); 8482 8483 return error_count; 8484 } 8485 8486 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) 8487 { 8488 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8489 8490 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS); 8491 8492 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) { 8493 if (sts_val & BIT(i)) { 8494 gaudi2_print_event(hdev, event_type, true, 8495 "err cause: %s", gaudi2_arc_sei_error_cause[i]); 8496 sts_clr_val |= BIT(i); 8497 error_count++; 8498 } 8499 } 8500 8501 hl_check_for_glbl_errors(hdev); 8502 8503 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val); 8504 8505 return error_count; 8506 } 8507 8508 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type) 8509 { 8510 u32 i, sts_val, sts_clr_val = 0, error_count = 0; 8511 8512 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS); 8513 8514 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) { 8515 if (sts_val & BIT(i)) { 8516 gaudi2_print_event(hdev, event_type, true, 8517 "err cause: %s", gaudi2_cpu_sei_error_cause[i]); 8518 sts_clr_val |= BIT(i); 8519 error_count++; 8520 } 8521 } 8522 8523 hl_check_for_glbl_errors(hdev); 8524 8525 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val); 8526 8527 return error_count; 8528 } 8529 8530 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type, 8531 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8532 u64 *event_mask) 8533 { 8534 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8535 u32 error_count = 0; 8536 int i; 8537 8538 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++) 8539 if (intr_cause_data & BIT(i)) { 8540 gaudi2_print_event(hdev, event_type, true, 8541 "err cause: %s", guadi2_rot_error_cause[i]); 8542 error_count++; 8543 } 8544 8545 /* check if RAZWI happened */ 8546 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask); 8547 hl_check_for_glbl_errors(hdev); 8548 8549 return error_count; 8550 } 8551 8552 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type, 8553 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause, 8554 u64 *event_mask) 8555 { 8556 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data); 8557 u32 error_count = 0; 8558 int i; 8559 8560 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++) 8561 if (intr_cause_data & BIT(i)) { 8562 gaudi2_print_event(hdev, event_type, true, 8563 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]); 8564 error_count++; 8565 } 8566 8567 /* check if RAZWI happened */ 8568 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask); 8569 hl_check_for_glbl_errors(hdev); 8570 8571 return error_count; 8572 } 8573 8574 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type, 8575 u64 *event_mask) 8576 { 8577 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0; 8578 int i; 8579 8580 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES) 8581 /* DCORE DEC */ 8582 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR + 8583 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) + 8584 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE); 8585 else 8586 /* PCIE DEC */ 8587 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET * 8588 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES); 8589 8590 sts_val = RREG32(sts_addr); 8591 8592 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) { 8593 if (sts_val & BIT(i)) { 8594 gaudi2_print_event(hdev, event_type, true, 8595 "err cause: %s", gaudi2_dec_error_cause[i]); 8596 sts_clr_val |= BIT(i); 8597 error_count++; 8598 } 8599 } 8600 8601 /* check if RAZWI happened */ 8602 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask); 8603 hl_check_for_glbl_errors(hdev); 8604 8605 /* Write 1 clear errors */ 8606 WREG32(sts_addr, sts_clr_val); 8607 8608 return error_count; 8609 } 8610 8611 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8612 u64 *event_mask) 8613 { 8614 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8615 int i; 8616 8617 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index; 8618 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index; 8619 8620 sts_val = RREG32(sts_addr); 8621 8622 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) { 8623 if (sts_val & BIT(i)) { 8624 gaudi2_print_event(hdev, event_type, true, 8625 "err cause: %s", guadi2_mme_error_cause[i]); 8626 sts_clr_val |= BIT(i); 8627 error_count++; 8628 } 8629 } 8630 8631 /* check if RAZWI happened */ 8632 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++) 8633 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask); 8634 8635 hl_check_for_glbl_errors(hdev); 8636 8637 WREG32(sts_clr_addr, sts_clr_val); 8638 8639 return error_count; 8640 } 8641 8642 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, 8643 u64 intr_cause_data) 8644 { 8645 int i, error_count = 0; 8646 8647 for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) 8648 if (intr_cause_data & BIT(i)) { 8649 gaudi2_print_event(hdev, event_type, true, 8650 "err cause: %s", guadi2_mme_sbte_error_cause[i]); 8651 error_count++; 8652 } 8653 8654 hl_check_for_glbl_errors(hdev); 8655 8656 return error_count; 8657 } 8658 8659 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, 8660 u64 *event_mask) 8661 { 8662 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0; 8663 int i; 8664 8665 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index; 8666 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index; 8667 8668 sts_val = RREG32(sts_addr); 8669 8670 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) { 8671 if (sts_val & BIT(i)) { 8672 gaudi2_print_event(hdev, event_type, true, 8673 "err cause: %s", guadi2_mme_wap_error_cause[i]); 8674 sts_clr_val |= BIT(i); 8675 error_count++; 8676 } 8677 } 8678 8679 /* check if RAZWI happened on WAP0/1 */ 8680 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask); 8681 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask); 8682 hl_check_for_glbl_errors(hdev); 8683 8684 WREG32(sts_clr_addr, sts_clr_val); 8685 8686 return error_count; 8687 } 8688 8689 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type, 8690 u64 intr_cause_data) 8691 { 8692 u32 error_count = 0; 8693 int i; 8694 8695 /* If an AXI read or write error is received, an error is reported and 8696 * interrupt message is sent. Due to an HW errata, when reading the cause 8697 * register of the KDMA engine, the reported error is always HBW even if 8698 * the actual error caused by a LBW KDMA transaction. 8699 */ 8700 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8701 if (intr_cause_data & BIT(i)) { 8702 gaudi2_print_event(hdev, event_type, true, 8703 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]); 8704 error_count++; 8705 } 8706 8707 hl_check_for_glbl_errors(hdev); 8708 8709 return error_count; 8710 } 8711 8712 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr) 8713 { 8714 u32 error_count = 0, sts_val = RREG32(sts_addr); 8715 int i; 8716 8717 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++) 8718 if (sts_val & BIT(i)) { 8719 gaudi2_print_event(hdev, event_type, true, 8720 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]); 8721 error_count++; 8722 } 8723 8724 hl_check_for_glbl_errors(hdev); 8725 8726 return error_count; 8727 } 8728 8729 static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx) 8730 { 8731 u32 sts_addr; 8732 8733 sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET; 8734 return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr); 8735 } 8736 8737 static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx) 8738 { 8739 static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5}; 8740 u32 sts_addr, index; 8741 8742 index = edma_event_index_map[edma_idx]; 8743 8744 sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE + 8745 DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) + 8746 DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE); 8747 return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr); 8748 } 8749 8750 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask) 8751 { 8752 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 8753 8754 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 8755 if (RREG32(razwi_happened_addr)) { 8756 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8757 GAUDI2_ENGINE_ID_PCIE, event_mask); 8758 WREG32(razwi_happened_addr, 0x1); 8759 } 8760 8761 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 8762 if (RREG32(razwi_happened_addr)) { 8763 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8764 GAUDI2_ENGINE_ID_PCIE, event_mask); 8765 WREG32(razwi_happened_addr, 0x1); 8766 } 8767 8768 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 8769 if (RREG32(razwi_happened_addr)) { 8770 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", 8771 GAUDI2_ENGINE_ID_PCIE, event_mask); 8772 WREG32(razwi_happened_addr, 0x1); 8773 } 8774 8775 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 8776 if (RREG32(razwi_happened_addr)) { 8777 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", 8778 GAUDI2_ENGINE_ID_PCIE, event_mask); 8779 WREG32(razwi_happened_addr, 0x1); 8780 } 8781 } 8782 8783 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type, 8784 u64 intr_cause_data, u64 *event_mask) 8785 { 8786 u32 error_count = 0; 8787 int i; 8788 8789 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 8790 if (!(intr_cause_data & BIT_ULL(i))) 8791 continue; 8792 8793 gaudi2_print_event(hdev, event_type, true, 8794 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); 8795 error_count++; 8796 8797 switch (intr_cause_data & BIT_ULL(i)) { 8798 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: 8799 hl_check_for_glbl_errors(hdev); 8800 break; 8801 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: 8802 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); 8803 break; 8804 } 8805 } 8806 8807 return error_count; 8808 } 8809 8810 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type, 8811 u64 intr_cause_data) 8812 8813 { 8814 u32 error_count = 0; 8815 int i; 8816 8817 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) { 8818 if (intr_cause_data & BIT_ULL(i)) { 8819 gaudi2_print_event(hdev, event_type, true, 8820 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]); 8821 error_count++; 8822 } 8823 } 8824 8825 return error_count; 8826 } 8827 8828 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data) 8829 { 8830 u32 error_count = 0; 8831 int i; 8832 8833 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) { 8834 if (intr_cause_data & BIT_ULL(i)) { 8835 gaudi2_print_event(hdev, event_type, true, 8836 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]); 8837 error_count++; 8838 } 8839 } 8840 8841 return error_count; 8842 } 8843 8844 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu, 8845 u64 *event_mask) 8846 { 8847 u32 valid, val, axid_l, axid_h; 8848 u64 addr; 8849 8850 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8851 8852 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK)) 8853 return; 8854 8855 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE)); 8856 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK; 8857 addr <<= 32; 8858 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA)); 8859 8860 axid_l = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_LSB)); 8861 axid_h = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_MSB)); 8862 8863 dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx, transaction id 0x%llX\n", 8864 is_pmmu ? "PMMU" : "HMMU", addr, ((u64)axid_h << 32) + axid_l); 8865 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask); 8866 8867 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0); 8868 } 8869 8870 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu) 8871 { 8872 u32 valid, val; 8873 u64 addr; 8874 8875 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID)); 8876 8877 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK)) 8878 return; 8879 8880 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE)); 8881 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK; 8882 addr <<= 32; 8883 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA)); 8884 8885 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n", 8886 is_pmmu ? "PMMU" : "HMMU", addr); 8887 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0); 8888 } 8889 8890 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type, 8891 u64 mmu_base, bool is_pmmu, u64 *event_mask) 8892 { 8893 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0; 8894 int i; 8895 8896 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET); 8897 8898 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) { 8899 if (spi_sei_cause & BIT(i)) { 8900 gaudi2_print_event(hdev, event_type, true, 8901 "err cause: %s", gaudi2_mmu_spi_sei[i].cause); 8902 8903 if (i == 0) 8904 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask); 8905 else if (i == 1) 8906 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 8907 8908 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0) 8909 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit); 8910 8911 error_count++; 8912 } 8913 } 8914 8915 /* Clear cause */ 8916 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause); 8917 8918 /* Clear interrupt */ 8919 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr); 8920 8921 return error_count; 8922 } 8923 8924 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index) 8925 { 8926 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log, 8927 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0; 8928 int i; 8929 8930 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index; 8931 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index; 8932 8933 sei_cause_val = RREG32(sei_cause_addr); 8934 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val); 8935 cq_intr_val = RREG32(cq_intr_addr); 8936 8937 /* SEI interrupt */ 8938 if (sei_cause_cause) { 8939 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */ 8940 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK, 8941 sei_cause_val); 8942 8943 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) { 8944 if (!(sei_cause_cause & BIT(i))) 8945 continue; 8946 8947 gaudi2_print_event(hdev, event_type, true, 8948 "err cause: %s. %s: 0x%X", 8949 gaudi2_sm_sei_cause[i].cause_name, 8950 gaudi2_sm_sei_cause[i].log_name, 8951 sei_cause_log); 8952 error_count++; 8953 break; 8954 } 8955 8956 /* Clear SM_SEI_CAUSE */ 8957 WREG32(sei_cause_addr, 0); 8958 } 8959 8960 /* CQ interrupt */ 8961 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) { 8962 cq_intr_queue_index = 8963 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK, 8964 cq_intr_val); 8965 8966 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n", 8967 sm_index, cq_intr_queue_index); 8968 error_count++; 8969 8970 /* Clear CQ_INTR */ 8971 WREG32(cq_intr_addr, 0); 8972 } 8973 8974 hl_check_for_glbl_errors(hdev); 8975 8976 return error_count; 8977 } 8978 8979 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 8980 { 8981 bool is_pmmu = false; 8982 u32 error_count = 0; 8983 u64 mmu_base; 8984 u8 index; 8985 8986 switch (event_type) { 8987 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR: 8988 index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3; 8989 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8990 break; 8991 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP: 8992 index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP); 8993 mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8994 break; 8995 case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR: 8996 index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3; 8997 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 8998 break; 8999 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP: 9000 index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP); 9001 mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 9002 break; 9003 case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR: 9004 index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3; 9005 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 9006 break; 9007 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP: 9008 index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP); 9009 mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 9010 break; 9011 case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9012 index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3; 9013 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 9014 break; 9015 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9016 index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP); 9017 mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET; 9018 break; 9019 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9020 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9021 is_pmmu = true; 9022 mmu_base = mmPMMU_HBW_MMU_BASE; 9023 break; 9024 default: 9025 return 0; 9026 } 9027 9028 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base, 9029 is_pmmu, event_mask); 9030 hl_check_for_glbl_errors(hdev); 9031 9032 return error_count; 9033 } 9034 9035 9036 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */ 9037 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev, 9038 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt) 9039 { 9040 u32 addr, beat, beat_shift; 9041 bool rc = false; 9042 9043 dev_err_ratelimited(hdev->dev, 9044 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n", 9045 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt), 9046 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt), 9047 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt)); 9048 9049 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val); 9050 dev_err_ratelimited(hdev->dev, 9051 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n", 9052 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr), 9053 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr), 9054 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr), 9055 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr), 9056 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr)); 9057 9058 /* For each beat (RDQS edge), look for possible errors and print relevant info */ 9059 for (beat = 0 ; beat < 4 ; beat++) { 9060 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9061 (HBM_RD_ERR_SERR_BEAT0_MASK << beat)) 9062 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n", 9063 beat, 9064 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 9065 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 9066 9067 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9068 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) { 9069 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n", 9070 beat, 9071 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 9072 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome)); 9073 rc |= true; 9074 } 9075 9076 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT; 9077 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9078 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) { 9079 dev_err_ratelimited(hdev->dev, 9080 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n", 9081 beat, 9082 le32_to_cpu(rd_err_data->dbg_rd_err_dm), 9083 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) & 9084 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >> 9085 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift)); 9086 rc |= true; 9087 } 9088 9089 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat); 9090 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 9091 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2])); 9092 dev_err_ratelimited(hdev->dev, "\t0x%08x\n", 9093 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1])); 9094 } 9095 9096 return rc; 9097 } 9098 9099 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev, 9100 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt) 9101 { 9102 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds; 9103 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr; 9104 9105 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt); 9106 9107 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n", 9108 derr & 0x3, derr & 0xc); 9109 9110 /* JIRA H6-3286 - the following prints may not be valid */ 9111 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n"); 9112 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) { 9113 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr); 9114 dev_err_ratelimited(hdev->dev, 9115 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n", 9116 i, 9117 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr), 9118 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr), 9119 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr), 9120 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr)); 9121 } 9122 } 9123 9124 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev, 9125 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt) 9126 { 9127 __le32 *col_cmd = ca_par_err_data->dbg_col; 9128 __le16 *row_cmd = ca_par_err_data->dbg_row; 9129 u32 i; 9130 9131 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt); 9132 9133 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n"); 9134 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++) 9135 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i, 9136 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0), 9137 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0)); 9138 } 9139 9140 /* Returns true if hard reset is needed or false otherwise */ 9141 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, 9142 struct hl_eq_hbm_sei_data *sei_data) 9143 { 9144 bool require_hard_reset = false; 9145 u32 hbm_id, mc_id, cause_idx; 9146 9147 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4; 9148 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2; 9149 9150 cause_idx = sei_data->hdr.sei_cause; 9151 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) { 9152 gaudi2_print_event(hdev, event_type, true, 9153 "err cause: %s", 9154 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx); 9155 return true; 9156 } 9157 9158 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical, 9159 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s", 9160 sei_data->hdr.is_critical ? "Critical" : "Non-critical", 9161 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 9162 hbm_mc_sei_cause[cause_idx]); 9163 9164 /* Print error-specific info */ 9165 switch (cause_idx) { 9166 case HBM_SEI_CATTRIP: 9167 require_hard_reset = true; 9168 break; 9169 9170 case HBM_SEI_CMD_PARITY_EVEN: 9171 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info, 9172 le32_to_cpu(sei_data->hdr.cnt)); 9173 require_hard_reset = true; 9174 break; 9175 9176 case HBM_SEI_CMD_PARITY_ODD: 9177 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info, 9178 le32_to_cpu(sei_data->hdr.cnt)); 9179 require_hard_reset = true; 9180 break; 9181 9182 case HBM_SEI_WRITE_DATA_PARITY_ERR: 9183 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info, 9184 le32_to_cpu(sei_data->hdr.cnt)); 9185 require_hard_reset = true; 9186 break; 9187 9188 case HBM_SEI_READ_ERR: 9189 /* Unlike other SEI events, read error requires further processing of the 9190 * raw data in order to determine the root cause. 9191 */ 9192 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev, 9193 &sei_data->read_err_info, 9194 le32_to_cpu(sei_data->hdr.cnt)); 9195 break; 9196 9197 default: 9198 break; 9199 } 9200 9201 require_hard_reset |= !!sei_data->hdr.is_critical; 9202 9203 return require_hard_reset; 9204 } 9205 9206 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type, 9207 u64 intr_cause_data) 9208 { 9209 if (intr_cause_data) { 9210 gaudi2_print_event(hdev, event_type, true, 9211 "temperature error cause: %#llx", intr_cause_data); 9212 return 1; 9213 } 9214 9215 return 0; 9216 } 9217 9218 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data) 9219 { 9220 u32 i, error_count = 0; 9221 9222 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++) 9223 if (intr_cause_data & hbm_mc_spi[i].mask) { 9224 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n", 9225 hbm_mc_spi[i].cause); 9226 error_count++; 9227 } 9228 9229 return error_count; 9230 } 9231 9232 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 9233 { 9234 ktime_t zero_time = ktime_set(0, 0); 9235 9236 mutex_lock(&hdev->clk_throttling.lock); 9237 9238 switch (event_type) { 9239 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9240 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 9241 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 9242 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 9243 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 9244 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); 9245 break; 9246 9247 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9248 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 9249 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 9250 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); 9251 break; 9252 9253 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9254 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 9255 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 9256 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 9257 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 9258 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9259 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); 9260 break; 9261 9262 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9263 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 9264 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 9265 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9266 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); 9267 break; 9268 9269 default: 9270 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type); 9271 break; 9272 } 9273 9274 mutex_unlock(&hdev->clk_throttling.lock); 9275 } 9276 9277 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type, 9278 struct cpucp_pkt_sync_err *sync_err) 9279 { 9280 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 9281 9282 gaudi2_print_event(hdev, event_type, false, 9283 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d", 9284 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), 9285 q->pi, atomic_read(&q->ci)); 9286 } 9287 9288 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type) 9289 { 9290 u32 p2p_intr, msix_gw_intr, error_count = 0; 9291 9292 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR); 9293 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR); 9294 9295 if (p2p_intr) { 9296 gaudi2_print_event(hdev, event_type, true, 9297 "pcie p2p transaction terminated due to security, req_id(0x%x)", 9298 RREG32(mmPCIE_WRAP_P2P_REQ_ID)); 9299 9300 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1); 9301 error_count++; 9302 } 9303 9304 if (msix_gw_intr) { 9305 gaudi2_print_event(hdev, event_type, true, 9306 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)", 9307 RREG32(mmPCIE_WRAP_MSIX_GW_VEC)); 9308 9309 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1); 9310 error_count++; 9311 } 9312 9313 return error_count; 9314 } 9315 9316 static int gaudi2_handle_pcie_drain(struct hl_device *hdev, 9317 struct hl_eq_pcie_drain_ind_data *drain_data) 9318 { 9319 u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0; 9320 9321 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data); 9322 lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw); 9323 lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw); 9324 hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw); 9325 hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw); 9326 9327 if (cause & BIT_ULL(0)) { 9328 dev_err_ratelimited(hdev->dev, 9329 "PCIE AXI drain LBW completed, read_err %u, write_err %u\n", 9330 !!lbw_rd, !!lbw_wr); 9331 error_count++; 9332 } 9333 9334 if (cause & BIT_ULL(1)) { 9335 dev_err_ratelimited(hdev->dev, 9336 "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n", 9337 hbw_rd, hbw_wr); 9338 error_count++; 9339 } 9340 9341 return error_count; 9342 } 9343 9344 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data) 9345 { 9346 u32 error_count = 0; 9347 int i; 9348 9349 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) { 9350 if (intr_cause_data & BIT_ULL(i)) { 9351 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n", 9352 gaudi2_psoc_axi_drain_interrupts_cause[i]); 9353 error_count++; 9354 } 9355 } 9356 9357 hl_check_for_glbl_errors(hdev); 9358 9359 return error_count; 9360 } 9361 9362 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type, 9363 struct cpucp_pkt_sync_err *sync_err) 9364 { 9365 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ]; 9366 9367 gaudi2_print_event(hdev, event_type, false, 9368 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d", 9369 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 9370 } 9371 9372 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, 9373 struct hl_eq_engine_arc_intr_data *data) 9374 { 9375 struct hl_engine_arc_dccm_queue_full_irq *q; 9376 u32 intr_type, engine_id; 9377 u64 payload; 9378 9379 intr_type = le32_to_cpu(data->intr_type); 9380 engine_id = le32_to_cpu(data->engine_id); 9381 payload = le64_to_cpu(data->payload); 9382 9383 switch (intr_type) { 9384 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ: 9385 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 9386 9387 gaudi2_print_event(hdev, event_type, true, 9388 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u", 9389 engine_id, intr_type, q->queue_index); 9390 return 1; 9391 default: 9392 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type"); 9393 return 0; 9394 } 9395 } 9396 9397 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 9398 { 9399 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9400 bool reset_required = false, is_critical = false; 9401 u32 index, ctl, reset_flags = 0, error_count = 0; 9402 u64 event_mask = 0; 9403 u16 event_type; 9404 9405 ctl = le32_to_cpu(eq_entry->hdr.ctl); 9406 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); 9407 9408 if (event_type >= GAUDI2_EVENT_SIZE) { 9409 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 9410 event_type, GAUDI2_EVENT_SIZE - 1); 9411 return; 9412 } 9413 9414 gaudi2->events_stat[event_type]++; 9415 gaudi2->events_stat_aggregate[event_type]++; 9416 9417 switch (event_type) { 9418 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR: 9419 fallthrough; 9420 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 9421 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9422 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9423 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 9424 is_critical = eq_entry->ecc_data.is_critical; 9425 error_count++; 9426 break; 9427 9428 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM: 9429 fallthrough; 9430 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM: 9431 fallthrough; 9432 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 9433 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask); 9434 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9435 break; 9436 9437 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 9438 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9439 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); 9440 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9441 break; 9442 9443 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 9444 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type); 9445 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9446 break; 9447 9448 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 9449 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 9450 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9451 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask); 9452 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9453 break; 9454 9455 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: 9456 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: 9457 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 9458 error_count = gaudi2_handle_rot_err(hdev, index, event_type, 9459 &eq_entry->razwi_with_intr_cause, &event_mask); 9460 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9461 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9462 break; 9463 9464 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: 9465 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; 9466 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 9467 &eq_entry->razwi_with_intr_cause, &event_mask); 9468 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9469 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9470 break; 9471 9472 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 9473 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 9474 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 9475 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9476 break; 9477 9478 case GAUDI2_EVENT_TPC0_KERNEL_ERR: 9479 case GAUDI2_EVENT_TPC1_KERNEL_ERR: 9480 case GAUDI2_EVENT_TPC2_KERNEL_ERR: 9481 case GAUDI2_EVENT_TPC3_KERNEL_ERR: 9482 case GAUDI2_EVENT_TPC4_KERNEL_ERR: 9483 case GAUDI2_EVENT_TPC5_KERNEL_ERR: 9484 case GAUDI2_EVENT_TPC6_KERNEL_ERR: 9485 case GAUDI2_EVENT_TPC7_KERNEL_ERR: 9486 case GAUDI2_EVENT_TPC8_KERNEL_ERR: 9487 case GAUDI2_EVENT_TPC9_KERNEL_ERR: 9488 case GAUDI2_EVENT_TPC10_KERNEL_ERR: 9489 case GAUDI2_EVENT_TPC11_KERNEL_ERR: 9490 case GAUDI2_EVENT_TPC12_KERNEL_ERR: 9491 case GAUDI2_EVENT_TPC13_KERNEL_ERR: 9492 case GAUDI2_EVENT_TPC14_KERNEL_ERR: 9493 case GAUDI2_EVENT_TPC15_KERNEL_ERR: 9494 case GAUDI2_EVENT_TPC16_KERNEL_ERR: 9495 case GAUDI2_EVENT_TPC17_KERNEL_ERR: 9496 case GAUDI2_EVENT_TPC18_KERNEL_ERR: 9497 case GAUDI2_EVENT_TPC19_KERNEL_ERR: 9498 case GAUDI2_EVENT_TPC20_KERNEL_ERR: 9499 case GAUDI2_EVENT_TPC21_KERNEL_ERR: 9500 case GAUDI2_EVENT_TPC22_KERNEL_ERR: 9501 case GAUDI2_EVENT_TPC23_KERNEL_ERR: 9502 case GAUDI2_EVENT_TPC24_KERNEL_ERR: 9503 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 9504 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 9505 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type, 9506 &eq_entry->razwi_with_intr_cause, &event_mask); 9507 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9508 break; 9509 9510 case GAUDI2_EVENT_DEC0_SPI: 9511 case GAUDI2_EVENT_DEC1_SPI: 9512 case GAUDI2_EVENT_DEC2_SPI: 9513 case GAUDI2_EVENT_DEC3_SPI: 9514 case GAUDI2_EVENT_DEC4_SPI: 9515 case GAUDI2_EVENT_DEC5_SPI: 9516 case GAUDI2_EVENT_DEC6_SPI: 9517 case GAUDI2_EVENT_DEC7_SPI: 9518 case GAUDI2_EVENT_DEC8_SPI: 9519 case GAUDI2_EVENT_DEC9_SPI: 9520 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 9521 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 9522 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask); 9523 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9524 break; 9525 9526 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: 9527 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: 9528 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: 9529 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: 9530 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) / 9531 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE - 9532 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE); 9533 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 9534 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask); 9535 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9536 break; 9537 9538 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: 9539 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR: 9540 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR: 9541 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR: 9542 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) / 9543 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 9544 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 9545 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask); 9546 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9547 break; 9548 9549 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: 9550 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: 9551 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: 9552 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: 9553 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) / 9554 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 9555 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 9556 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask); 9557 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9558 break; 9559 9560 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 9561 case GAUDI2_EVENT_KDMA0_CORE: 9562 error_count = gaudi2_handle_kdma_core_event(hdev, event_type, 9563 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9564 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9565 break; 9566 9567 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE: 9568 index = event_type - GAUDI2_EVENT_HDMA2_CORE; 9569 error_count = gaudi2_handle_edma_core_event(hdev, event_type, index); 9570 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9571 break; 9572 9573 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE: 9574 index = event_type - GAUDI2_EVENT_PDMA0_CORE; 9575 error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index); 9576 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9577 break; 9578 9579 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 9580 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type, 9581 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask); 9582 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9583 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9584 break; 9585 9586 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: 9587 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: 9588 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: 9589 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 9590 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask); 9591 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9592 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9593 break; 9594 9595 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 9596 error_count = gaudi2_handle_hif_fatal(hdev, event_type, 9597 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9598 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9599 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9600 break; 9601 9602 case GAUDI2_EVENT_PMMU_FATAL_0: 9603 error_count = gaudi2_handle_pif_fatal(hdev, event_type, 9604 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9605 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9606 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9607 break; 9608 9609 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 9610 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask); 9611 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9612 break; 9613 9614 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 9615 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9616 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 9617 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9618 reset_required = true; 9619 } 9620 error_count++; 9621 break; 9622 9623 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 9624 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type, 9625 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9626 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9627 break; 9628 9629 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 9630 error_count = gaudi2_handle_hbm_mc_spi(hdev, 9631 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9632 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9633 break; 9634 9635 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 9636 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 9637 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9638 break; 9639 9640 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 9641 error_count = gaudi2_handle_psoc_drain(hdev, 9642 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9643 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9644 break; 9645 9646 case GAUDI2_EVENT_CPU_AXI_ECC: 9647 error_count = GAUDI2_NA_EVENT_CAUSE; 9648 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9649 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9650 break; 9651 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 9652 error_count = GAUDI2_NA_EVENT_CAUSE; 9653 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9654 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9655 break; 9656 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 9657 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: 9658 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: 9659 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: 9660 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, 9661 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 9662 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9663 break; 9664 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 9665 error_count = GAUDI2_NA_EVENT_CAUSE; 9666 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9667 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9668 break; 9669 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 9670 error_count = GAUDI2_NA_EVENT_CAUSE; 9671 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9672 break; 9673 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 9674 error_count = GAUDI2_NA_EVENT_CAUSE; 9675 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9676 break; 9677 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 9678 error_count = GAUDI2_NA_EVENT_CAUSE; 9679 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 9680 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9681 break; 9682 case GAUDI2_EVENT_PCIE_FATAL_ERR: 9683 error_count = GAUDI2_NA_EVENT_CAUSE; 9684 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9685 break; 9686 case GAUDI2_EVENT_TPC0_BMON_SPMU: 9687 case GAUDI2_EVENT_TPC1_BMON_SPMU: 9688 case GAUDI2_EVENT_TPC2_BMON_SPMU: 9689 case GAUDI2_EVENT_TPC3_BMON_SPMU: 9690 case GAUDI2_EVENT_TPC4_BMON_SPMU: 9691 case GAUDI2_EVENT_TPC5_BMON_SPMU: 9692 case GAUDI2_EVENT_TPC6_BMON_SPMU: 9693 case GAUDI2_EVENT_TPC7_BMON_SPMU: 9694 case GAUDI2_EVENT_TPC8_BMON_SPMU: 9695 case GAUDI2_EVENT_TPC9_BMON_SPMU: 9696 case GAUDI2_EVENT_TPC10_BMON_SPMU: 9697 case GAUDI2_EVENT_TPC11_BMON_SPMU: 9698 case GAUDI2_EVENT_TPC12_BMON_SPMU: 9699 case GAUDI2_EVENT_TPC13_BMON_SPMU: 9700 case GAUDI2_EVENT_TPC14_BMON_SPMU: 9701 case GAUDI2_EVENT_TPC15_BMON_SPMU: 9702 case GAUDI2_EVENT_TPC16_BMON_SPMU: 9703 case GAUDI2_EVENT_TPC17_BMON_SPMU: 9704 case GAUDI2_EVENT_TPC18_BMON_SPMU: 9705 case GAUDI2_EVENT_TPC19_BMON_SPMU: 9706 case GAUDI2_EVENT_TPC20_BMON_SPMU: 9707 case GAUDI2_EVENT_TPC21_BMON_SPMU: 9708 case GAUDI2_EVENT_TPC22_BMON_SPMU: 9709 case GAUDI2_EVENT_TPC23_BMON_SPMU: 9710 case GAUDI2_EVENT_TPC24_BMON_SPMU: 9711 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU: 9712 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU: 9713 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU: 9714 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU: 9715 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU: 9716 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU: 9717 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU: 9718 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU: 9719 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU: 9720 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU: 9721 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU: 9722 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU: 9723 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU: 9724 fallthrough; 9725 case GAUDI2_EVENT_DEC0_BMON_SPMU: 9726 case GAUDI2_EVENT_DEC1_BMON_SPMU: 9727 case GAUDI2_EVENT_DEC2_BMON_SPMU: 9728 case GAUDI2_EVENT_DEC3_BMON_SPMU: 9729 case GAUDI2_EVENT_DEC4_BMON_SPMU: 9730 case GAUDI2_EVENT_DEC5_BMON_SPMU: 9731 case GAUDI2_EVENT_DEC6_BMON_SPMU: 9732 case GAUDI2_EVENT_DEC7_BMON_SPMU: 9733 case GAUDI2_EVENT_DEC8_BMON_SPMU: 9734 case GAUDI2_EVENT_DEC9_BMON_SPMU: 9735 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 9736 error_count = GAUDI2_NA_EVENT_CAUSE; 9737 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9738 break; 9739 9740 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: 9741 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E: 9742 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 9743 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 9744 gaudi2_print_clk_change_info(hdev, event_type, &event_mask); 9745 error_count = GAUDI2_NA_EVENT_CAUSE; 9746 break; 9747 9748 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 9749 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err); 9750 error_count = GAUDI2_NA_EVENT_CAUSE; 9751 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9752 break; 9753 9754 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 9755 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9756 error_count = GAUDI2_NA_EVENT_CAUSE; 9757 /* Do nothing- FW will handle it */ 9758 break; 9759 9760 case GAUDI2_EVENT_PCIE_P2P_MSIX: 9761 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type); 9762 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9763 break; 9764 9765 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 9766 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 9767 error_count = gaudi2_handle_sm_err(hdev, event_type, index); 9768 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9769 break; 9770 9771 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 9772 error_count = GAUDI2_NA_EVENT_CAUSE; 9773 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9774 break; 9775 9776 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 9777 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 9778 le64_to_cpu(eq_entry->data[0])); 9779 error_count = GAUDI2_NA_EVENT_CAUSE; 9780 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9781 break; 9782 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 9783 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 9784 le64_to_cpu(eq_entry->data[0])); 9785 error_count = GAUDI2_NA_EVENT_CAUSE; 9786 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9787 break; 9788 9789 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 9790 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err); 9791 error_count = GAUDI2_NA_EVENT_CAUSE; 9792 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9793 break; 9794 9795 case GAUDI2_EVENT_ARC_DCCM_FULL: 9796 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data); 9797 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 9798 break; 9799 9800 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED: 9801 case GAUDI2_EVENT_CPU_DEV_RESET_REQ: 9802 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 9803 error_count = GAUDI2_NA_EVENT_CAUSE; 9804 is_critical = true; 9805 break; 9806 9807 default: 9808 if (gaudi2_irq_map_table[event_type].valid) { 9809 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", 9810 event_type); 9811 error_count = GAUDI2_NA_EVENT_CAUSE; 9812 } 9813 } 9814 9815 /* Make sure to dump an error in case no error cause was printed so far. 9816 * Note that although we have counted the errors, we use this number as 9817 * a boolean. 9818 */ 9819 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type)) 9820 gaudi2_print_event(hdev, event_type, true, "%d", event_type); 9821 else if (error_count == 0) 9822 gaudi2_print_event(hdev, event_type, true, 9823 "No error cause for H/W event %u", event_type); 9824 9825 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || 9826 reset_required) { 9827 if (reset_required || 9828 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD)) 9829 reset_flags |= HL_DRV_RESET_HARD; 9830 9831 if (hdev->hard_reset_on_fw_events || 9832 (hdev->asic_prop.fw_security_enabled && is_critical)) 9833 goto reset_device; 9834 } 9835 9836 /* Send unmask irq only for interrupts not classified as MSG */ 9837 if (!gaudi2_irq_map_table[event_type].msg) 9838 hl_fw_unmask_irq(hdev, event_type); 9839 9840 if (event_mask) 9841 hl_notifier_event_send_all(hdev, event_mask); 9842 9843 return; 9844 9845 reset_device: 9846 if (hdev->asic_prop.fw_security_enabled && is_critical) { 9847 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW; 9848 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 9849 } else { 9850 reset_flags |= HL_DRV_RESET_DELAY; 9851 } 9852 /* escalate general hw errors to critical/fatal error */ 9853 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 9854 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 9855 9856 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 9857 hl_device_cond_reset(hdev, reset_flags, event_mask); 9858 } 9859 9860 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev, 9861 struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr, 9862 u32 hw_queue_id, u32 size, u64 addr, u32 val) 9863 { 9864 u32 ctl, pkt_size; 9865 int rc = 0; 9866 9867 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 9868 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 9869 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1); 9870 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1); 9871 9872 lin_dma_pkt->ctl = cpu_to_le32(ctl); 9873 lin_dma_pkt->src_addr = cpu_to_le64(val); 9874 lin_dma_pkt->dst_addr = cpu_to_le64(addr); 9875 lin_dma_pkt->tsize = cpu_to_le32(size); 9876 9877 pkt_size = sizeof(struct packet_lin_dma); 9878 9879 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr); 9880 if (rc) 9881 dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n", 9882 hw_queue_id); 9883 9884 return rc; 9885 } 9886 9887 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) 9888 { 9889 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0, 9890 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0, 9891 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0, 9892 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0}; 9893 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val, 9894 old_mmubp, mmubp, num_of_pkts, busy, pkt_size; 9895 u64 comp_addr, cur_addr = addr, end_addr = addr + size; 9896 struct asic_fixed_properties *prop = &hdev->asic_prop; 9897 void *lin_dma_pkts_arr; 9898 dma_addr_t pkt_dma_addr; 9899 int rc = 0, dma_num = 0; 9900 9901 if (prop->edma_enabled_mask == 0) { 9902 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n"); 9903 return -EIO; 9904 } 9905 9906 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4; 9907 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset; 9908 comp_addr = CFG_BASE + sob_addr; 9909 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) | 9910 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1); 9911 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) | 9912 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1); 9913 9914 /* Calculate how many lin dma pkts we'll need */ 9915 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G); 9916 pkt_size = sizeof(struct packet_lin_dma); 9917 9918 lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts, 9919 &pkt_dma_addr, GFP_KERNEL); 9920 if (!lin_dma_pkts_arr) 9921 return -ENOMEM; 9922 9923 /* 9924 * set mmu bypass for the scrubbing - all ddmas are configured the same so save 9925 * only the first one to restore later 9926 * also set the sob addr for all edma cores for completion. 9927 * set QM as trusted to allow it to access physical address with MMU bp. 9928 */ 9929 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP); 9930 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9931 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9932 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9933 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9934 9935 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9936 continue; 9937 9938 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + 9939 edma_offset, mmubp); 9940 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 9941 lower_32_bits(comp_addr)); 9942 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 9943 upper_32_bits(comp_addr)); 9944 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 9945 comp_val); 9946 gaudi2_qman_set_test_mode(hdev, 9947 edma_queues_id[dcore] + 4 * edma_idx, true); 9948 } 9949 } 9950 9951 WREG32(sob_addr, 0); 9952 9953 while (cur_addr < end_addr) { 9954 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9955 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9956 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9957 9958 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9959 continue; 9960 9961 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr); 9962 9963 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev, 9964 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num, 9965 pkt_dma_addr + dma_num * pkt_size, 9966 edma_queues_id[dcore] + edma_idx * 4, 9967 chunk_size, cur_addr, val); 9968 if (rc) 9969 goto end; 9970 9971 dma_num++; 9972 cur_addr += chunk_size; 9973 if (cur_addr == end_addr) 9974 break; 9975 } 9976 } 9977 } 9978 9979 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000); 9980 if (rc) { 9981 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n"); 9982 goto end; 9983 } 9984 end: 9985 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) { 9986 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) { 9987 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET; 9988 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx; 9989 9990 if (!(prop->edma_enabled_mask & BIT(edma_bit))) 9991 continue; 9992 9993 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp); 9994 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0); 9995 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0); 9996 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0); 9997 gaudi2_qman_set_test_mode(hdev, 9998 edma_queues_id[dcore] + 4 * edma_idx, false); 9999 } 10000 } 10001 10002 WREG32(sob_addr, 0); 10003 hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr); 10004 10005 return rc; 10006 } 10007 10008 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val) 10009 { 10010 int rc; 10011 struct asic_fixed_properties *prop = &hdev->asic_prop; 10012 u64 size = prop->dram_end_address - prop->dram_user_base_address; 10013 10014 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val); 10015 10016 if (rc) 10017 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n", 10018 prop->dram_user_base_address, size); 10019 return rc; 10020 } 10021 10022 static int gaudi2_scrub_device_mem(struct hl_device *hdev) 10023 { 10024 int rc; 10025 struct asic_fixed_properties *prop = &hdev->asic_prop; 10026 u64 val = hdev->memory_scrub_val; 10027 u64 addr, size; 10028 10029 if (!hdev->memory_scrub) 10030 return 0; 10031 10032 /* scrub SRAM */ 10033 addr = prop->sram_user_base_address; 10034 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET); 10035 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n", 10036 addr, addr + size, val); 10037 rc = gaudi2_memset_device_memory(hdev, addr, size, val); 10038 if (rc) { 10039 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc); 10040 return rc; 10041 } 10042 10043 /* scrub DRAM */ 10044 rc = gaudi2_scrub_device_dram(hdev, val); 10045 if (rc) { 10046 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc); 10047 return rc; 10048 } 10049 return 0; 10050 } 10051 10052 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev) 10053 { 10054 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr, 10055 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr; 10056 u32 val, size, offset; 10057 int dcore_id; 10058 10059 offset = hdev->asic_prop.first_available_cq[0] * 4; 10060 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset; 10061 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset; 10062 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset; 10063 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset; 10064 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset; 10065 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset; 10066 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - 10067 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset); 10068 10069 /* memset dcore0 CQ registers */ 10070 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 10071 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 10072 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 10073 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 10074 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 10075 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 10076 10077 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET; 10078 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET; 10079 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET; 10080 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET; 10081 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET; 10082 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET; 10083 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0; 10084 10085 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 10086 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0); 10087 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0); 10088 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0); 10089 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0); 10090 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0); 10091 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0); 10092 10093 cq_lbw_l_addr += DCORE_OFFSET; 10094 cq_lbw_h_addr += DCORE_OFFSET; 10095 cq_lbw_data_addr += DCORE_OFFSET; 10096 cq_base_l_addr += DCORE_OFFSET; 10097 cq_base_h_addr += DCORE_OFFSET; 10098 cq_size_addr += DCORE_OFFSET; 10099 } 10100 10101 offset = hdev->asic_prop.first_available_user_mon[0] * 4; 10102 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset; 10103 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT; 10104 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset); 10105 10106 /* memset dcore0 monitors */ 10107 gaudi2_memset_device_lbw(hdev, addr, size, val); 10108 10109 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset; 10110 gaudi2_memset_device_lbw(hdev, addr, size, 0); 10111 10112 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET; 10113 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET; 10114 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0; 10115 10116 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 10117 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val); 10118 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0); 10119 mon_sts_addr += DCORE_OFFSET; 10120 mon_cfg_addr += DCORE_OFFSET; 10121 } 10122 10123 offset = hdev->asic_prop.first_available_user_sob[0] * 4; 10124 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset; 10125 val = 0; 10126 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - 10127 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 10128 10129 /* memset dcore0 sobs */ 10130 gaudi2_memset_device_lbw(hdev, addr, size, val); 10131 10132 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET; 10133 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0; 10134 10135 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) { 10136 gaudi2_memset_device_lbw(hdev, addr, size, val); 10137 addr += DCORE_OFFSET; 10138 } 10139 10140 /* Flush all WREG to prevent race */ 10141 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset); 10142 } 10143 10144 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev) 10145 { 10146 u32 reg_base, hw_queue_id; 10147 10148 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0; 10149 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 10150 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 10151 continue; 10152 10153 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 10154 10155 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 10156 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 10157 } 10158 10159 /* Flush all WREG to prevent race */ 10160 RREG32(mmPDMA0_QM_ARB_CFG_0); 10161 } 10162 10163 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev) 10164 { 10165 u32 reg_base, hw_queue_id; 10166 10167 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3; 10168 hw_queue_id += NUM_OF_PQ_PER_QMAN) { 10169 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id)) 10170 continue; 10171 10172 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false); 10173 10174 reg_base = gaudi2_qm_blocks_bases[hw_queue_id]; 10175 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0); 10176 } 10177 10178 /* Flush all WREG to prevent race */ 10179 RREG32(mmPDMA0_QM_ARB_CFG_0); 10180 } 10181 10182 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid) 10183 { 10184 return 0; 10185 } 10186 10187 static void gaudi2_restore_phase_topology(struct hl_device *hdev) 10188 { 10189 } 10190 10191 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx, 10192 struct dup_block_ctx *cfg_ctx) 10193 { 10194 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off; 10195 u8 seq; 10196 int i; 10197 10198 for (i = 0 ; i < cfg_ctx->instances ; i++) { 10199 seq = block_idx * cfg_ctx->instances + i; 10200 10201 /* skip disabled instance */ 10202 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq))) 10203 continue; 10204 10205 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off, 10206 cfg_ctx->data); 10207 } 10208 } 10209 10210 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx, 10211 u64 mask) 10212 { 10213 int i; 10214 10215 cfg_ctx->enabled_mask = mask; 10216 10217 for (i = 0 ; i < cfg_ctx->blocks ; i++) 10218 gaudi2_init_block_instances(hdev, i, cfg_ctx); 10219 } 10220 10221 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx) 10222 { 10223 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX); 10224 } 10225 10226 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr) 10227 { 10228 void *host_mem_virtual_addr; 10229 dma_addr_t host_mem_dma_addr; 10230 u64 reserved_va_base; 10231 u32 pos, size_left, size_to_dma; 10232 struct hl_ctx *ctx; 10233 int rc = 0; 10234 10235 /* Fetch the ctx */ 10236 ctx = hl_get_compute_ctx(hdev); 10237 if (!ctx) { 10238 dev_err(hdev->dev, "No ctx available\n"); 10239 return -EINVAL; 10240 } 10241 10242 /* Allocate buffers for read and for poll */ 10243 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr, 10244 GFP_KERNEL | __GFP_ZERO); 10245 if (host_mem_virtual_addr == NULL) { 10246 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n"); 10247 rc = -ENOMEM; 10248 goto put_ctx; 10249 } 10250 10251 /* Reserve VM region on asic side */ 10252 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M, 10253 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 10254 if (!reserved_va_base) { 10255 dev_err(hdev->dev, "Failed to reserve vmem on asic\n"); 10256 rc = -ENOMEM; 10257 goto free_data_buffer; 10258 } 10259 10260 /* Create mapping on asic side */ 10261 mutex_lock(&hdev->mmu_lock); 10262 10263 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 10264 if (rc) { 10265 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 10266 goto unreserve_va; 10267 } 10268 10269 rc = hl_mmu_invalidate_cache_range(hdev, false, 10270 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 10271 ctx->asid, reserved_va_base, SZ_2M); 10272 if (rc) { 10273 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 10274 goto unreserve_va; 10275 } 10276 10277 mutex_unlock(&hdev->mmu_lock); 10278 10279 /* Enable MMU on KDMA */ 10280 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); 10281 10282 pos = 0; 10283 size_left = size; 10284 size_to_dma = SZ_2M; 10285 10286 while (size_left > 0) { 10287 if (size_left < SZ_2M) 10288 size_to_dma = size_left; 10289 10290 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false); 10291 if (rc) 10292 break; 10293 10294 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma); 10295 10296 if (size_left <= SZ_2M) 10297 break; 10298 10299 pos += SZ_2M; 10300 addr += SZ_2M; 10301 size_left -= SZ_2M; 10302 } 10303 10304 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 10305 10306 mutex_lock(&hdev->mmu_lock); 10307 10308 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 10309 if (rc) 10310 goto unreserve_va; 10311 10312 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 10313 ctx->asid, reserved_va_base, SZ_2M); 10314 10315 unreserve_va: 10316 mutex_unlock(&hdev->mmu_lock); 10317 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 10318 free_data_buffer: 10319 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr); 10320 put_ctx: 10321 hl_ctx_put(ctx); 10322 10323 return rc; 10324 } 10325 10326 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx) 10327 { 10328 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10329 int min_alloc_order, rc; 10330 10331 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 10332 return 0; 10333 10334 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 10335 HOST_SPACE_INTERNAL_CB_SZ, 10336 &hdev->internal_cb_pool_dma_addr, 10337 GFP_KERNEL | __GFP_ZERO); 10338 10339 if (!hdev->internal_cb_pool_virt_addr) 10340 return -ENOMEM; 10341 10342 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev), 10343 gaudi2_get_wait_cb_size(hdev))); 10344 10345 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 10346 if (!hdev->internal_cb_pool) { 10347 dev_err(hdev->dev, "Failed to create internal CB pool\n"); 10348 rc = -ENOMEM; 10349 goto free_internal_cb_pool; 10350 } 10351 10352 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr, 10353 HOST_SPACE_INTERNAL_CB_SZ, -1); 10354 if (rc) { 10355 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n"); 10356 rc = -EFAULT; 10357 goto destroy_internal_cb_pool; 10358 } 10359 10360 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 10361 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 10362 10363 if (!hdev->internal_cb_va_base) { 10364 rc = -ENOMEM; 10365 goto destroy_internal_cb_pool; 10366 } 10367 10368 mutex_lock(&hdev->mmu_lock); 10369 10370 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 10371 HOST_SPACE_INTERNAL_CB_SZ); 10372 if (rc) 10373 goto unreserve_internal_cb_pool; 10374 10375 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 10376 if (rc) 10377 goto unmap_internal_cb_pool; 10378 10379 mutex_unlock(&hdev->mmu_lock); 10380 10381 return 0; 10382 10383 unmap_internal_cb_pool: 10384 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10385 unreserve_internal_cb_pool: 10386 mutex_unlock(&hdev->mmu_lock); 10387 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10388 destroy_internal_cb_pool: 10389 gen_pool_destroy(hdev->internal_cb_pool); 10390 free_internal_cb_pool: 10391 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 10392 hdev->internal_cb_pool_dma_addr); 10393 10394 return rc; 10395 } 10396 10397 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx) 10398 { 10399 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10400 10401 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 10402 return; 10403 10404 mutex_lock(&hdev->mmu_lock); 10405 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10406 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 10407 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 10408 mutex_unlock(&hdev->mmu_lock); 10409 10410 gen_pool_destroy(hdev->internal_cb_pool); 10411 10412 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 10413 hdev->internal_cb_pool_dma_addr); 10414 } 10415 10416 static void gaudi2_restore_user_registers(struct hl_device *hdev) 10417 { 10418 gaudi2_restore_user_sm_registers(hdev); 10419 gaudi2_restore_user_qm_registers(hdev); 10420 } 10421 10422 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 10423 { 10424 struct hl_device *hdev = ctx->hdev; 10425 struct asic_fixed_properties *prop = &hdev->asic_prop; 10426 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10427 int rc; 10428 10429 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 10430 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true); 10431 if (rc) 10432 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n", 10433 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 10434 10435 return rc; 10436 } 10437 10438 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx) 10439 { 10440 struct hl_device *hdev = ctx->hdev; 10441 struct asic_fixed_properties *prop = &hdev->asic_prop; 10442 int rc; 10443 10444 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START, 10445 prop->pmmu.page_size, true); 10446 if (rc) 10447 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n", 10448 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START); 10449 } 10450 10451 static int gaudi2_ctx_init(struct hl_ctx *ctx) 10452 { 10453 int rc; 10454 10455 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); 10456 if (rc) 10457 return rc; 10458 10459 /* No need to clear user registers if the device has just 10460 * performed reset, we restore only nic qm registers 10461 */ 10462 if (ctx->hdev->reset_upon_device_release) 10463 gaudi2_restore_nic_qm_registers(ctx->hdev); 10464 else 10465 gaudi2_restore_user_registers(ctx->hdev); 10466 10467 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx); 10468 if (rc) 10469 return rc; 10470 10471 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx); 10472 if (rc) 10473 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 10474 10475 return rc; 10476 } 10477 10478 static void gaudi2_ctx_fini(struct hl_ctx *ctx) 10479 { 10480 if (ctx->asid == HL_KERNEL_ASID_ID) 10481 return; 10482 10483 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx); 10484 10485 gaudi2_unmap_virtual_msix_doorbell_memory(ctx); 10486 } 10487 10488 static int gaudi2_pre_schedule_cs(struct hl_cs *cs) 10489 { 10490 struct hl_device *hdev = cs->ctx->hdev; 10491 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1); 10492 u32 mon_payload, sob_id, mon_id; 10493 10494 if (!cs_needs_completion(cs)) 10495 return 0; 10496 10497 /* 10498 * First 64 SOB/MON are reserved for driver for QMAN auto completion 10499 * mechanism. Each SOB/MON pair are used for a pending CS with the same 10500 * cyclic index. The SOB value is increased when each of the CS jobs is 10501 * completed. When the SOB reaches the number of CS jobs, the monitor 10502 * generates MSI-X interrupt. 10503 */ 10504 10505 sob_id = mon_id = index; 10506 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | 10507 (1 << CQ_ENTRY_READY_SHIFT) | index; 10508 10509 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload, 10510 cs->jobs_cnt); 10511 10512 return 0; 10513 } 10514 10515 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 10516 { 10517 return HL_INVALID_QUEUE; 10518 } 10519 10520 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 10521 { 10522 struct hl_cb *cb = data; 10523 struct packet_msg_short *pkt; 10524 u32 value, ctl, pkt_size = sizeof(*pkt); 10525 10526 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size); 10527 memset(pkt, 0, pkt_size); 10528 10529 /* Inc by 1, Mode ADD */ 10530 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 10531 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 10532 10533 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 10534 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */ 10535 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10536 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb); 10537 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10538 10539 pkt->value = cpu_to_le32(value); 10540 pkt->ctl = cpu_to_le32(ctl); 10541 10542 return size + pkt_size; 10543 } 10544 10545 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr) 10546 { 10547 u32 ctl, pkt_size = sizeof(*pkt); 10548 10549 memset(pkt, 0, pkt_size); 10550 10551 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10552 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10553 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10554 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10555 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0); 10556 10557 pkt->value = cpu_to_le32(value); 10558 pkt->ctl = cpu_to_le32(ctl); 10559 10560 return pkt_size; 10561 } 10562 10563 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt, 10564 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr) 10565 { 10566 u32 ctl, value, pkt_size = sizeof(*pkt); 10567 u8 mask; 10568 10569 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 10570 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask); 10571 return 0; 10572 } 10573 10574 memset(pkt, 0, pkt_size); 10575 10576 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 10577 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 10578 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/ 10579 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask); 10580 10581 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr); 10582 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */ 10583 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 10584 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10585 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10586 10587 pkt->value = cpu_to_le32(value); 10588 pkt->ctl = cpu_to_le32(ctl); 10589 10590 return pkt_size; 10591 } 10592 10593 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) 10594 { 10595 u32 ctl, cfg, pkt_size = sizeof(*pkt); 10596 10597 memset(pkt, 0, pkt_size); 10598 10599 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 10600 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 10601 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2); 10602 10603 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 10604 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0); 10605 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1); 10606 10607 pkt->cfg = cpu_to_le32(cfg); 10608 pkt->ctl = cpu_to_le32(ctl); 10609 10610 return pkt_size; 10611 } 10612 10613 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 10614 { 10615 struct hl_cb *cb = prop->data; 10616 void *buf = (void *) (uintptr_t) (cb->kernel_address); 10617 10618 u64 monitor_base, fence_addr = 0; 10619 u32 stream_index, size = prop->size; 10620 u16 msg_addr_offset; 10621 10622 stream_index = prop->q_idx % 4; 10623 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] + 10624 QM_FENCE2_OFFSET + stream_index * 4; 10625 10626 /* 10627 * monitor_base should be the content of the base0 address registers, 10628 * so it will be added to the msg short offsets 10629 */ 10630 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 10631 10632 /* First monitor config packet: low address of the sync */ 10633 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) - 10634 monitor_base; 10635 10636 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset); 10637 10638 /* Second monitor config packet: high address of the sync */ 10639 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) - 10640 monitor_base; 10641 10642 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset); 10643 10644 /* 10645 * Third monitor config packet: the payload, i.e. what to write when the 10646 * sync triggers 10647 */ 10648 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) - 10649 monitor_base; 10650 10651 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset); 10652 10653 /* Fourth monitor config packet: bind the monitor to a sync object */ 10654 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base; 10655 10656 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask, 10657 prop->sob_val, msg_addr_offset); 10658 10659 /* Fence packet */ 10660 size += gaudi2_add_fence_pkt(buf + size); 10661 10662 return size; 10663 } 10664 10665 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 10666 { 10667 struct hl_hw_sob *hw_sob = data; 10668 10669 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 10670 10671 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0); 10672 10673 kref_init(&hw_sob->kref); 10674 } 10675 10676 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group) 10677 { 10678 } 10679 10680 static u64 gaudi2_get_device_time(struct hl_device *hdev) 10681 { 10682 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 10683 10684 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 10685 } 10686 10687 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs) 10688 { 10689 return 0; 10690 } 10691 10692 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx, 10693 struct hl_cs *cs, u32 wait_queue_id, 10694 u32 collective_engine_id, u32 encaps_signal_offset) 10695 { 10696 return -EINVAL; 10697 } 10698 10699 /* 10700 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address 10701 * to DMMU page-size address (64MB) before mapping it in 10702 * the MMU. 10703 * The operation is performed on both the virtual and physical addresses. 10704 * for device with 6 HBMs the scramble is: 10705 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48] 10706 * 10707 * Example: 10708 * ============================================================================= 10709 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA 10710 * Phys address in MMU last 10711 * HOP 10712 * ============================================================================= 10713 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1 10714 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3 10715 * ============================================================================= 10716 */ 10717 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr) 10718 { 10719 struct asic_fixed_properties *prop = &hdev->asic_prop; 10720 u32 divisor, mod_va; 10721 u64 div_va; 10722 10723 /* accept any address in the DRAM address space */ 10724 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE, 10725 VA_HBM_SPACE_END)) { 10726 10727 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10728 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va); 10729 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) | 10730 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) | 10731 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT); 10732 } 10733 10734 return raw_addr; 10735 } 10736 10737 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr) 10738 { 10739 struct asic_fixed_properties *prop = &hdev->asic_prop; 10740 u32 divisor, mod_va; 10741 u64 div_va; 10742 10743 /* accept any address in the DRAM address space */ 10744 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE, 10745 VA_HBM_SPACE_END)) { 10746 10747 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE; 10748 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, 10749 PAGE_SIZE_64MB, &mod_va); 10750 10751 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) + 10752 (div_va * divisor + mod_va)); 10753 } 10754 10755 return scrambled_addr; 10756 } 10757 10758 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id) 10759 { 10760 u32 base = 0, dcore_id, dec_id; 10761 10762 if (core_id >= NUMBER_OF_DEC) { 10763 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id); 10764 goto out; 10765 } 10766 10767 if (core_id < 8) { 10768 dcore_id = core_id / NUM_OF_DEC_PER_DCORE; 10769 dec_id = core_id % NUM_OF_DEC_PER_DCORE; 10770 10771 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET + 10772 dec_id * DCORE_VDEC_OFFSET; 10773 } else { 10774 /* PCIe Shared Decoder */ 10775 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET); 10776 } 10777 out: 10778 return base; 10779 } 10780 10781 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 10782 u32 *block_size, u32 *block_id) 10783 { 10784 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10785 int i; 10786 10787 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) { 10788 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) { 10789 *block_id = i; 10790 if (block_size) 10791 *block_size = gaudi2->mapped_blocks[i].size; 10792 return 0; 10793 } 10794 } 10795 10796 dev_err(hdev->dev, "Invalid block address %#llx", block_addr); 10797 10798 return -EINVAL; 10799 } 10800 10801 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 10802 u32 block_id, u32 block_size) 10803 { 10804 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10805 u64 offset_in_bar; 10806 u64 address; 10807 int rc; 10808 10809 if (block_id >= NUM_USER_MAPPED_BLOCKS) { 10810 dev_err(hdev->dev, "Invalid block id %u", block_id); 10811 return -EINVAL; 10812 } 10813 10814 /* we allow mapping only an entire block */ 10815 if (block_size != gaudi2->mapped_blocks[block_id].size) { 10816 dev_err(hdev->dev, "Invalid block size %u", block_size); 10817 return -EINVAL; 10818 } 10819 10820 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR; 10821 10822 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar; 10823 10824 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 10825 VM_DONTCOPY | VM_NORESERVE); 10826 10827 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 10828 block_size, vma->vm_page_prot); 10829 if (rc) 10830 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 10831 10832 return rc; 10833 } 10834 10835 static void gaudi2_enable_events_from_fw(struct hl_device *hdev) 10836 { 10837 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10838 10839 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 10840 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq); 10841 10842 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) 10843 WREG32(irq_handler_offset, 10844 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id); 10845 } 10846 10847 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base) 10848 { 10849 switch (mmu_id) { 10850 case HW_CAP_DCORE0_DMMU0: 10851 *mmu_base = mmDCORE0_HMMU0_MMU_BASE; 10852 break; 10853 case HW_CAP_DCORE0_DMMU1: 10854 *mmu_base = mmDCORE0_HMMU1_MMU_BASE; 10855 break; 10856 case HW_CAP_DCORE0_DMMU2: 10857 *mmu_base = mmDCORE0_HMMU2_MMU_BASE; 10858 break; 10859 case HW_CAP_DCORE0_DMMU3: 10860 *mmu_base = mmDCORE0_HMMU3_MMU_BASE; 10861 break; 10862 case HW_CAP_DCORE1_DMMU0: 10863 *mmu_base = mmDCORE1_HMMU0_MMU_BASE; 10864 break; 10865 case HW_CAP_DCORE1_DMMU1: 10866 *mmu_base = mmDCORE1_HMMU1_MMU_BASE; 10867 break; 10868 case HW_CAP_DCORE1_DMMU2: 10869 *mmu_base = mmDCORE1_HMMU2_MMU_BASE; 10870 break; 10871 case HW_CAP_DCORE1_DMMU3: 10872 *mmu_base = mmDCORE1_HMMU3_MMU_BASE; 10873 break; 10874 case HW_CAP_DCORE2_DMMU0: 10875 *mmu_base = mmDCORE2_HMMU0_MMU_BASE; 10876 break; 10877 case HW_CAP_DCORE2_DMMU1: 10878 *mmu_base = mmDCORE2_HMMU1_MMU_BASE; 10879 break; 10880 case HW_CAP_DCORE2_DMMU2: 10881 *mmu_base = mmDCORE2_HMMU2_MMU_BASE; 10882 break; 10883 case HW_CAP_DCORE2_DMMU3: 10884 *mmu_base = mmDCORE2_HMMU3_MMU_BASE; 10885 break; 10886 case HW_CAP_DCORE3_DMMU0: 10887 *mmu_base = mmDCORE3_HMMU0_MMU_BASE; 10888 break; 10889 case HW_CAP_DCORE3_DMMU1: 10890 *mmu_base = mmDCORE3_HMMU1_MMU_BASE; 10891 break; 10892 case HW_CAP_DCORE3_DMMU2: 10893 *mmu_base = mmDCORE3_HMMU2_MMU_BASE; 10894 break; 10895 case HW_CAP_DCORE3_DMMU3: 10896 *mmu_base = mmDCORE3_HMMU3_MMU_BASE; 10897 break; 10898 case HW_CAP_PMMU: 10899 *mmu_base = mmPMMU_HBW_MMU_BASE; 10900 break; 10901 default: 10902 return -EINVAL; 10903 } 10904 10905 return 0; 10906 } 10907 10908 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 10909 { 10910 bool is_pmmu = (mmu_id == HW_CAP_PMMU); 10911 struct gaudi2_device *gaudi2 = hdev->asic_specific; 10912 u32 mmu_base; 10913 10914 if (!(gaudi2->hw_cap_initialized & mmu_id)) 10915 return; 10916 10917 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base)) 10918 return; 10919 10920 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL); 10921 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu); 10922 } 10923 10924 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 10925 { 10926 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES; 10927 10928 /* check all HMMUs */ 10929 for (i = 0 ; i < num_of_hmmus ; i++) { 10930 mmu_id = HW_CAP_DCORE0_DMMU0 << i; 10931 10932 if (mmu_cap_mask & mmu_id) 10933 gaudi2_ack_mmu_error(hdev, mmu_id); 10934 } 10935 10936 /* check PMMU */ 10937 if (mmu_cap_mask & HW_CAP_PMMU) 10938 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU); 10939 10940 return 0; 10941 } 10942 10943 static void gaudi2_get_msi_info(__le32 *table) 10944 { 10945 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); 10946 } 10947 10948 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) 10949 { 10950 switch (pll_idx) { 10951 case HL_GAUDI2_CPU_PLL: return CPU_PLL; 10952 case HL_GAUDI2_PCI_PLL: return PCI_PLL; 10953 case HL_GAUDI2_NIC_PLL: return NIC_PLL; 10954 case HL_GAUDI2_DMA_PLL: return DMA_PLL; 10955 case HL_GAUDI2_MESH_PLL: return MESH_PLL; 10956 case HL_GAUDI2_MME_PLL: return MME_PLL; 10957 case HL_GAUDI2_TPC_PLL: return TPC_PLL; 10958 case HL_GAUDI2_IF_PLL: return IF_PLL; 10959 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL; 10960 case HL_GAUDI2_HBM_PLL: return HBM_PLL; 10961 case HL_GAUDI2_VID_PLL: return VID_PLL; 10962 case HL_GAUDI2_MSS_PLL: return MSS_PLL; 10963 default: return -EINVAL; 10964 } 10965 } 10966 10967 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map) 10968 { 10969 /* Not implemented */ 10970 return 0; 10971 } 10972 10973 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon) 10974 { 10975 /* Not implemented */ 10976 return 0; 10977 } 10978 10979 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset, 10980 struct hl_device *hdev, struct hl_mon_state_dump *mon) 10981 { 10982 /* Not implemented */ 10983 return 0; 10984 } 10985 10986 10987 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset, 10988 u64 status_base_offset, enum hl_sync_engine_type engine_type, 10989 u32 engine_id, char **buf, size_t *size, size_t *offset) 10990 { 10991 /* Not implemented */ 10992 return 0; 10993 } 10994 10995 10996 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = { 10997 .monitor_valid = gaudi2_monitor_valid, 10998 .print_single_monitor = gaudi2_print_single_monitor, 10999 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map, 11000 .print_fences_single_engine = gaudi2_print_fences_single_engine, 11001 }; 11002 11003 static void gaudi2_state_dump_init(struct hl_device *hdev) 11004 { 11005 /* Not implemented */ 11006 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props; 11007 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs; 11008 } 11009 11010 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id) 11011 { 11012 return 0; 11013 } 11014 11015 static u32 *gaudi2_get_stream_master_qid_arr(void) 11016 { 11017 return NULL; 11018 } 11019 11020 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 11021 struct attribute_group *dev_vrm_attr_grp) 11022 { 11023 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 11024 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp); 11025 } 11026 11027 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop, 11028 u32 page_size, u32 *real_page_size, bool is_dram_addr) 11029 { 11030 struct asic_fixed_properties *prop = &hdev->asic_prop; 11031 11032 /* for host pages the page size must be */ 11033 if (!is_dram_addr) { 11034 if (page_size % mmu_prop->page_size) 11035 goto page_size_err; 11036 11037 *real_page_size = mmu_prop->page_size; 11038 return 0; 11039 } 11040 11041 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size)) 11042 goto page_size_err; 11043 11044 /* 11045 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater 11046 * than DRAM page size). 11047 * for this reason work with the DRAM page size and let the MMU scrambling routine handle 11048 * this mismatch when calculating the address to place in the MMU page table. 11049 * (in that case also make sure that the dram_page_size is not greater than the 11050 * mmu page size) 11051 */ 11052 *real_page_size = prop->dram_page_size; 11053 11054 return 0; 11055 11056 page_size_err: 11057 dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n", 11058 page_size, mmu_prop->page_size >> 10); 11059 return -EFAULT; 11060 } 11061 11062 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) 11063 { 11064 return -EOPNOTSUPP; 11065 } 11066 11067 int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 11068 { 11069 struct gaudi2_device *gaudi2 = hdev->asic_specific; 11070 11071 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) 11072 return 0; 11073 11074 return hl_fw_send_device_activity(hdev, open); 11075 } 11076 11077 static const struct hl_asic_funcs gaudi2_funcs = { 11078 .early_init = gaudi2_early_init, 11079 .early_fini = gaudi2_early_fini, 11080 .late_init = gaudi2_late_init, 11081 .late_fini = gaudi2_late_fini, 11082 .sw_init = gaudi2_sw_init, 11083 .sw_fini = gaudi2_sw_fini, 11084 .hw_init = gaudi2_hw_init, 11085 .hw_fini = gaudi2_hw_fini, 11086 .halt_engines = gaudi2_halt_engines, 11087 .suspend = gaudi2_suspend, 11088 .resume = gaudi2_resume, 11089 .mmap = gaudi2_mmap, 11090 .ring_doorbell = gaudi2_ring_doorbell, 11091 .pqe_write = gaudi2_pqe_write, 11092 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent, 11093 .asic_dma_free_coherent = gaudi2_dma_free_coherent, 11094 .scrub_device_mem = gaudi2_scrub_device_mem, 11095 .scrub_device_dram = gaudi2_scrub_device_dram, 11096 .get_int_queue_base = NULL, 11097 .test_queues = gaudi2_test_queues, 11098 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc, 11099 .asic_dma_pool_free = gaudi2_dma_pool_free, 11100 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, 11101 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, 11102 .asic_dma_unmap_single = gaudi2_dma_unmap_single, 11103 .asic_dma_map_single = gaudi2_dma_map_single, 11104 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, 11105 .cs_parser = gaudi2_cs_parser, 11106 .asic_dma_map_sgtable = hl_dma_map_sgtable, 11107 .add_end_of_cb_packets = NULL, 11108 .update_eq_ci = gaudi2_update_eq_ci, 11109 .context_switch = gaudi2_context_switch, 11110 .restore_phase_topology = gaudi2_restore_phase_topology, 11111 .debugfs_read_dma = gaudi2_debugfs_read_dma, 11112 .add_device_attr = gaudi2_add_device_attr, 11113 .handle_eqe = gaudi2_handle_eqe, 11114 .get_events_stat = gaudi2_get_events_stat, 11115 .read_pte = NULL, 11116 .write_pte = NULL, 11117 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache, 11118 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range, 11119 .mmu_prefetch_cache_range = NULL, 11120 .send_heartbeat = gaudi2_send_heartbeat, 11121 .debug_coresight = gaudi2_debug_coresight, 11122 .is_device_idle = gaudi2_is_device_idle, 11123 .compute_reset_late_init = gaudi2_compute_reset_late_init, 11124 .hw_queues_lock = gaudi2_hw_queues_lock, 11125 .hw_queues_unlock = gaudi2_hw_queues_unlock, 11126 .get_pci_id = gaudi2_get_pci_id, 11127 .get_eeprom_data = gaudi2_get_eeprom_data, 11128 .get_monitor_dump = gaudi2_get_monitor_dump, 11129 .send_cpu_message = gaudi2_send_cpu_message, 11130 .pci_bars_map = gaudi2_pci_bars_map, 11131 .init_iatu = gaudi2_init_iatu, 11132 .rreg = hl_rreg, 11133 .wreg = hl_wreg, 11134 .halt_coresight = gaudi2_halt_coresight, 11135 .ctx_init = gaudi2_ctx_init, 11136 .ctx_fini = gaudi2_ctx_fini, 11137 .pre_schedule_cs = gaudi2_pre_schedule_cs, 11138 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq, 11139 .load_firmware_to_device = NULL, 11140 .load_boot_fit_to_device = NULL, 11141 .get_signal_cb_size = gaudi2_get_signal_cb_size, 11142 .get_wait_cb_size = gaudi2_get_wait_cb_size, 11143 .gen_signal_cb = gaudi2_gen_signal_cb, 11144 .gen_wait_cb = gaudi2_gen_wait_cb, 11145 .reset_sob = gaudi2_reset_sob, 11146 .reset_sob_group = gaudi2_reset_sob_group, 11147 .get_device_time = gaudi2_get_device_time, 11148 .pb_print_security_errors = gaudi2_pb_print_security_errors, 11149 .collective_wait_init_cs = gaudi2_collective_wait_init_cs, 11150 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs, 11151 .get_dec_base_addr = gaudi2_get_dec_base_addr, 11152 .scramble_addr = gaudi2_mmu_scramble_addr, 11153 .descramble_addr = gaudi2_mmu_descramble_addr, 11154 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors, 11155 .get_hw_block_id = gaudi2_get_hw_block_id, 11156 .hw_block_mmap = gaudi2_block_mmap, 11157 .enable_events_from_fw = gaudi2_enable_events_from_fw, 11158 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error, 11159 .get_msi_info = gaudi2_get_msi_info, 11160 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx, 11161 .init_firmware_preload_params = gaudi2_init_firmware_preload_params, 11162 .init_firmware_loader = gaudi2_init_firmware_loader, 11163 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm, 11164 .state_dump_init = gaudi2_state_dump_init, 11165 .get_sob_addr = &gaudi2_get_sob_addr, 11166 .set_pci_memory_regions = gaudi2_set_pci_memory_regions, 11167 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr, 11168 .check_if_razwi_happened = gaudi2_check_if_razwi_happened, 11169 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 11170 .access_dev_mem = hl_access_dev_mem, 11171 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 11172 .set_engine_cores = gaudi2_set_engine_cores, 11173 .set_engines = gaudi2_set_engines, 11174 .send_device_activity = gaudi2_send_device_activity, 11175 .set_dram_properties = gaudi2_set_dram_properties, 11176 .set_binning_masks = gaudi2_set_binning_masks, 11177 }; 11178 11179 void gaudi2_set_asic_funcs(struct hl_device *hdev) 11180 { 11181 hdev->asic_funcs = &gaudi2_funcs; 11182 } 11183